Important note for current users!
Effective May 5, 2026, DataForSEO v2 will no longer be supported. Learn more>>
OnPage API checks websites for 60+ on-page parameters, defines and displays all found flaws and opportunities for optimization, so that you can easily fix them. It checks meta tags, duplicated content, image tags, response codes and other parameters on every page. You can find the full list of OnPage API check-up parameters in the page result fields section.
Setting Tasks
Using this function, you can set tasks for scanning the website. After a task has been set, it will undergo three stages of completion, each of which will be shown in the the status field of the task. Firstly, the task is queued to the completion status="in_queue". Secondly, the website is crawled status="crawling". On the final stage, when the scanning process is finished, the task gets status="crawled". After the task is completed (status="crawled"), you will be able to use functions to analyze the received results. The current status of the task can be found in the results of the Get Tasks Status function. The results of the completed task will be available during 30 days from the day of its completion.
Task completion time depends on many factors. They are a number of scanned pages (the field crawl_max_pages that you must specify when setting a task), a response time of the server where a website is located, the volume of the pages that are being analyzed, etc.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
<?php
require('RestClient.php');
//You can download this file from here https://api.dataforseo.com/_examples/php/_php_RestClient.zip
try {
$client = new RestClient('https://api.dataforseo.com/', null, 'login', 'password');
} catch (RestClientException $e) {
echo "\n";
print "HTTP code: {$e->getHttpCode()}\n";
print "Error code: {$e->getCode()}\n";
print "Message: {$e->getMessage()}\n";
print $e->getTraceAsString();
echo "\n";
exit();
}
$post_array = array();
$my_unq_id = mt_rand(0,30000000); //your unique ID (like you DB "id" field. type 'string'). will be returned with all results
$post_array[$my_unq_id] = array(
"site" => "ranksonic.com",
"crawl_max_pages" => 10
);
try {
// POST /v2/op_tasks_post/$data
// $tasks_data must by array with key 'data'
$task_post_result = $client->post("v2/op_tasks_post", array('data' => $post_array));
print_r($task_post_result);
//do something with post results
} catch (RestClientException $e) {
echo "\n";
print "HTTP code: {$e->getHttpCode()}\n";
print "Error code: {$e->getCode()}\n";
print "Message: {$e->getMessage()}\n";
print $e->getTraceAsString();
echo "\n";
}
$client = null;
?>
from random import Random
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
#Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
client = RestClient("login", "password")
rnd = Random() #you can set as "index of post_data" your ID, string, etc. we will return it with all results.
post_data = dict()
post_data[rnd.randint(1, 30000000)] = dict(
site="ranksonic.com",
crawl_max_pages=10
)
response = client.post("/v2/op_tasks_post", dict(data=post_data))
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_post()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var rnd = new Random(); //you can set as "index of post_data" your ID, string, etc. we will return it with all results.
var postObject = new Dictionary<int, object>
{
[rnd.Next(1, 30000000)] = new
{
site = "ranksonic.com",
crawl_max_pages = 10
}
};
var taskPostResponse = await httpClient.PostAsync("v2/op_tasks_post", new StringContent(JsonConvert.SerializeObject(new { data = postObject })));
var obj = JsonConvert.DeserializeObject(await taskPostResponse.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else
{
foreach (var result in obj.results)
{
var taskState = ((IEnumerable)result).First();
if (taskState.status == "error")
Console.WriteLine($"Error in task with post_id {taskState.post_id}. Code: {taskState.error.code} Message: {taskState.error.message}");
Console.WriteLine(taskState);
}
}
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_post() throws JSONException, IOException, URISyntaxException {
URI url = new URI("https://api.dataforseo.com/v2/op_tasks_post");
HttpClient client = HttpClientBuilder.create().build();
HttpPost post = new HttpPost(url);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
Map<Integer, Map<String, Object>> postValues = new HashMap<>();
Random rnd = new Random();
Map<String, Object> postObj = new HashMap<>();
postObj.put("site", "ranksonic.com");
postObj.put("crawl_max_pages", 10);
postValues.put(rnd.nextInt(30000000), postObj);
JSONObject json = new JSONObject().put("data", postValues);
StringEntity input = new StringEntity(json.toString());
input.setContentType("application/json");
post.setHeader("Content-type", "application/json");
post.setHeader("Authorization", "Basic " + basicAuth);
post.setEntity(input);
HttpResponse taskPostResponse = client.execute(post);
JSONObject taskPostObj = new JSONObject(EntityUtils.toString(taskPostResponse.getEntity()));
if (taskPostObj.get("status").equals("error")) {
System.out.println("error. Code:" + taskPostObj.getJSONObject("error").get("code") + " Message:" + taskPostObj.getJSONObject("error").get("message"));
} else {
JSONObject results = taskPostObj.getJSONObject("results");
Iterator jkeys = results.keys();
while (jkeys.hasNext()) {
String key = jkeys.next();
String status = "";
if (!results.getJSONObject(key).isNull("status")) {
status = results.getJSONObject(key).get("status").toString();
}
if (status.equals("error"))
System.out.println("Error in task with post_id " + results.getJSONObject(key).get("post_id") + ". Code: " + results.getJSONObject(key).getJSONObject("error").get("code") + " Message: " + results.getJSONObject(key).getJSONObject("error").get("message"));
else {
System.out.println(results.getJSONObject(key).toString());
}
}
}
}
}
The above command returns JSON structured like this:
If the number of crawled pages is fewer than the specified crawl_max_pages parameter, remaining credits will be refunded
All POST data should be sent in the JSON format (UTF-8 encoding). The task setting is done using POST method when the array of tasks is sent to the data field. Each of the array elements has the following structure:
Field name
Type
Description
site
string
site required field
crawl_max_pages
integer
the maximum number of test pages required field credits will be withdrawn on the basis of this parameter
if the number of crawled pages is fewer than this parameter, remaining credits will be refunded.
crawl_max_depth
integer
crawl depth
optional field
crawl depth of the website. For example: homepage is level 0, links from the homepage is level 1, etc. Unique links are taken into account (for example, links from level 1 will be reflected as level 0).
default value: 0.
crawl_delay
float
delay between queries, sec
optional field
this parameter enables adjusting the frequency of queries to the server in order to reduce the load and avoid DDOS.
default value: 2
cookies_use
integer
usage of cookies when a website is being scanned
optional field
сan take the values: 0 – no, 1 – yes.
default value: 1.
robots
string
user robots.txt
optional field
You can set up your robots.txt for this crawling task.
robots_mode
string
merger mode with robots.txt of the website
optional field
can take values: ‘merge’, ‘override’.
default value: ‘merge’.
string_search_containment
string
presence of the text on the page
optional field
the result of the search will be shown in the string_containment_check field.
default value: ‘null’.
pingback_url
string
notification URL of the completed task
optional field
when the task is completed we will notify you by sending the GET request to the URL you have specified
you can use the ‘$task_id’ string as the $task_id variable and the ‘$post_id’ string as the $post_id variable. We will set necessary values before sending a request. For example:
http://your-server.com/pingscript?taskId=$task_id
http://your-server.com/pingscript?taskId=$task_id&postId=$post_id
As a response of the API server you will receive the JSONarray in the results field where you will find information appropriate to the tasks set.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, check the error array for more details
error
array
informational array of the error only if status=“error”
the list of possible errors can be found below.
code
integer
error code
message
string
text description of the error
results_time
string
execution time, seconds
results_count
string
the number of elements in the results array
results
array
results array of task setting
post_id
string
index in the array received in the POST request
post_site
string
site received in the POST request
task_id
integer
unique task identifier in our system(UInt64)
you will be able to use it to request results of this task at any time within the next 30 days.
status
string
results of this task setting
“ok” – successful
“error” – error
if status=“error”, check the error array for more details
error
array
informational array of the error only if status=“error”
the list of possible errors can be found below.
code
integer
error code
message
string
text description of the error
Possible errors codes
Error Code
Meaning
404
“not found or not enough data: site” – you didn’t specify a website in the task
404
“not found or not enough data: crawl_max_pages” – you didn’t specify the crawl_max_pages field in the task
501
“invalid ‘data’ field” – probably you haven’t passed data for the tasks in the data field. POST data should be represented as an array and added to the data field: array(‘data’ => $post_array_for_tasks)
501
“invalid data” – data in the data field isn’t an array with the required structure
500
“internal error” – some internal error. We did our best to avoid this type of error
Get Tasks Status
Using this function, you can get the current status of the task completion. If a task has the status="crawled" you can use functions to analyze the received results.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get")
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
using Newtonsoft.Json;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_get()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
DefaultRequestHeaders = {Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password")))}
};
var response = await httpClient.GetAsync("v2/op_tasks_get");
var obj = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else if (obj.results_count != 0)
{
foreach (var result in obj.results)
{
var resultItem = ((IEnumerable) result).First();
Console.WriteLine(resultItem);
}
}
else
Console.WriteLine("no results");
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_get() throws JSONException, IOException, URISyntaxException {
URI url = new URI("https://api.dataforseo.com/v2/op_tasks_get");
HttpClient client;
client = HttpClientBuilder.create().build();
HttpGet get = new HttpGet(url);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
get.setHeader("Content-type", "application/json");
get.setHeader("Authorization", "Basic " + basicAuth);
HttpResponse completedTasksResponse = client.execute(get);
JSONObject completedTasksObj = new JSONObject(EntityUtils.toString(completedTasksResponse.getEntity()));
if (completedTasksObj.get("status") == "error") {
JSONObject errorObj = completedTasksObj.getJSONObject("error");
System.out.println("error. Code: " + errorObj.get("code") + " Message: " + errorObj.get("message"));
} else if (!completedTasksObj.get("results_count").equals(0)) {
JSONArray results = completedTasksObj.getJSONArray("results");
for (int i = 0; i < results.length(); i++) {
System.out.println(results.getJSONObject(i));
}
} else {
System.out.println("no results");
}
}
}
The above command returns JSON structured like this:
You can receive results status in two different ways:
GET https://api.dataforseo.com/v2/op_tasks_get
you will receive all results status.
When setting a task (Setting OnPage Tasks) you’ve specified pingback_url. As soon as the task is completed we will send the GET request to the URL you’ve specified as pingback_url.
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, check the error array for more details
error
array
informational array of error only if status=“error”
code
integer
error code
message
string
text description of the error
results_time
string
execution time, seconds
results_count
string
number of elements in the results array
results
array
results array
post_id
string
index in the array received in the POST array
post_site
string
site received in the POST array
task_id
integer
unique task identifier in our system(UInt64)
you will be able to use it to request results of this task at any time within the next 30 days
string_search_containment
string
string_search_containment received in the POST request
default value: ‘null’.
crawl_max_pages
integer
the maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning – the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused” if the task status is “crawled” you can get the results for this task
Get Task Result Summary
Using this function, you can get the overall information of a website. This information allows you to detect exact on-page issues of a website that has been scanned. As a result, you will know what functions to use for receiving the detailed data for each of the found problems.
The data for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected untill the moment you check.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get/123456789")
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_get_by_task_id()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var taskid = 123456789;
var response = await httpClient.GetAsync($"v2/op_tasks_get/{taskid}");
var obj = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else if (obj.results_count != 0)
{
foreach (var result in obj.results)
{
Console.WriteLine(result);
}
}
else
Console.WriteLine("no results");
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_get_by_task_id() throws JSONException, IOException {
HttpClient client;
client = HttpClientBuilder.create().build();
int taskId = 123456789;
HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get/" + taskId);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
get.setHeader("Content-type", "application/json");
get.setHeader("Authorization", "Basic " + basicAuth);
HttpResponse response = client.execute(get);
JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));
if (obj.get("status").equals("error")) {
System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
} else {
JSONArray results = obj.getJSONArray("results");
if (results.length() > 0) {
for (int i = 0; i < results.length(); i++) {
System.out.println(results.get(i));
}
} else {
System.out.println("no results");
}
}
}
}
The above command returns JSON structured like this:
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, check the error array for more details
error
array
the informational array of the error only if status=“error”
code
integer
error code
message
string
text description of the error
results_time
string
execution time, seconds
results_count
string
the number of elements in the results array
results
array
results array
post_id
string
index in the array received in the POST array
post_site
string
site received in the POST array
task_id
integer
unique task identifier in our system(UInt64)
you will be able to use it to request results of this task at any time within 30 days
string_search_containment
string
string_search_containment received in the POST request
default value: ‘null’.
crawl_max_pages
integer
the maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning, the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused” if the task status “crawled” you will get the result in the summary array
summary
array
composite result
absent_doctype
integer
number of pages without <!DOCTYPE html>
absent_encoding_meta_tag
integer
the number of pages without <meta charset=...>, but only if the encoding is not explicit in the header Content-Type (for example Content-Type: "text/html; charset=utf8")
absent_h1_tags
integer
the number of pages without H1
only for canonical pages
canonical_another
integer
number of pages with the canonical to another page
only for pages with 200 response code
canonical_recursive
integer
number of pages with recursive canonicals
cms
string
the content of the generator meta tag
the data is taken from the first random page that returns the 200 response code
compression_disabled
integer
number of pages without enabled gzip or deflate compression
only for pages with the 200 response code
number of pages, which have value (plaintext size/page size) less than 0.1 or more than 0.9
the data is available only for canonical pages
content_invalid_size
integer
number of pages, which have plain text size less than 1024 bytes or more than 256 kbytes
the data is available only for canonical pages
crawl_end
string
the date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
crawl_start
string
the date and time of the start of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-14 11:50:01 +00:00’
deprecated_html_tags
integer
the number of pages with deprecated html tags
the data is available only for canonical pages
more info: list of deprecated tags
domain
string
root domain without subdomains
for example: if ‘blog.example.com’ is checked, its value would be ‘example.com’
duplicate_meta_descriptions
integer
the number of pages with duplicate meta description
only for canonical pages
duplicate_meta_tags
integer
the number of pages with 2 or more meta tags of the same type
only for canonical pages
duplicate_pages
integer
the number of pages with duplicate content
only for canonical pages
duplicate_titles
integer
the number of pages with duplicated tag <title>
only for canonical pages
favicon_invalid
integer
the number of pages that don’t contain rel="icon" link
the data is available only for canonical pages
have_robots
boolean
presenсe of robots.txt
have_sitemap
boolean
presenсe of sitemap.xml
images_invalid_alt
integer
the number of pages that have at least one image with an empty or absent alt attribute of <img> tag
the data is available only for canonical pages
images_invalid_title
integer
the number of pages that have at least one image with an empty or absent title attribute of the <img> tag
the data is available only for canonical pages
ip
string
IP address of the website
links_broken
integer
the number of pages that have at least one reference to the page with a broken link
the link is considered broken if it leads to the page which response code is >=400 and <500
the data is available for all pages
links_external
integer
the total number of external links
the data is available for all pages
links_internal
integer
the total number of internal links
the data is available for all pages
meta_description_empty
integer
the number of pages with an empty or absent description meta tag
the data is available only for canonical pages
meta_description_inappropriate
integer
the number of pages with description tags that are irrelevant to the content of a page (only for canonical pages)
the relevance threshold is 0.2
the data is not available for pages that don’t have the description tag
meta_keywords_empty
integer
the number of pages with empty keywords in meta tags
the data is available only for canonical pages
meta_keywords_inappropriate
integer
the number of pages with keywords tag that is irrelevant to the content of the page (only for canonical pages)
the relevance threshold is 0.6
the data is not available for pages that don’t have the keywords tag
pages_broken
integer
the number of pages which response codes are >=400 or <200
the data is available for all pages
pages_http
integer
the number of pages with the HTTP protocol
the data is available for all pages
pages_https
integer
the number of pages with the HTTPS protocol
the data is available for all pages
pages_invalid_size
integer
the number of pages with the page size less than 1024 bytes or more than 256 kbytes
the data is available only for canonical pages
pages_non_www
integer
the number of pages without subdomain “www”
the data is available for all pages
pages_total
integer
the total number of scanned HTML pages
pages_with_flash
integer
the number of pages with flash elements
the data is available for all pages
pages_with_frame
integer
the number of pages that contain frame, iframe, frameset tags
the data is available for all pages
pages_with_lorem_ipsum
integer
the number of pages that probably contain ‘lorem ipsum’
the data is available for all pages
pages_www
integer
the number of pages with subdomain “www”
the data is available for all pages
response_code_1xx
integer
the number of pages which response codes are >=100 and <200
the data is available for all pages
response_code_2xx
integer
the number of pages which response codes are >=200 and <300
the data is available for all pages
response_code_3xx
integer
the number of pages which response codes are >=300 and <400
the data is available for all pages
response_code_4xx
integer
the number of pages which response codes are >=400 and <500
the data is available for all pages
response_code_5xx
integer
the number of pages which response codes are >=500 and <600
the data is available for all pages
response_code_other
integer
the number of pages which response codes are >=600 and <100
also, the number includes pages which response codes were not retrieved
the data is available for all pages
seo_friendly_url
integer
the number of pages with an ‘SEO-friendly URL’
the ‘SEO-friendliness’ of a page URL is checked by four parameters:
– the length of the relative path is less than 120 symbols
– no special characters
– no dynamic parameters
– relevance of the URL to the page
if at least one of them is failed then such URL is considered as not ‘SEO-friendly’
the data is available only for canonical pages
seo_non_friendly_url
integer
the number of pages that don’t have an ‘SEO-friendly URL’
the ‘SEO-friendliness’ of a page URL is checked by four parameters:
– the length of the relative path is less than 120 symbols
– no special characters
– no dynamic parameters
– relevance of URL to the page
if at least one of them is failed then such URL is considered as not ‘SEO-friendly’
the data is available only for canonical pages
server
string
the content of the server header
the information is taken from the first page which response code is 200
ssl
boolean
the usage of the secure SSL protocol
true – if there is at least one HTTPS page
relevant fields will contain data if ssl = true
the information about sertificate is taken from the first page that has HTTPS
ssl_certificate_expiration
string
expiration date and time of the SSL certificate
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-25 05:10:34 +00:00’
ssl_certificate_hash_algorithm
string
encryption algorithm of the SSL certificate
if website does not support SSL (field ssl=false), there is always an empty value
ssl_certificate_issuer
string
issuer of the SSL certificate
if website does not support SSL (field ssl=false), there is always an empty value
ssl_certificate_subject
string
issuer of the SSL certificate
if website does not support SSL (field ssl=false), there is always an empty value
ssl_certificate_valid
boolean
validation of the SSL certificate
if website does not support SSL (field ssl=false), there is always ‘false’
ssl_certificate_x509_version
integer
version of the SSL certificate
if website does not support SSL (field ssl=false), there is always an empty value
start_page_has_deny_flag
boolean
checkup of the possibility to scan a start page of a website true – if scanning of a start page is disallowed in robots.txt
string_containment_check
integer
the number of pages that contain text specified in the string_search_containment field
test_canonicalization
integer
the checkup of the server behavior when our crawler tries to access the website via IP
the field is a status code of server response
normally, a server returns 301 response code
test_directory_browsing
boolean
the checkup of the possibility to access a content directory of a website
some webservers may return the data that the server directory has
the checkup will be conducted if a website has at least one page which response code is 200
test_server_signature
boolean
*the checkup of header server
if the version is specified along with the server, the test will be considered as a failed one
knowing the version of the server, the attacker can exploit the vulnerabilities specific to this version to attack the site
the test is conducted after the information about header server is received
test_trash_page
integer
the checkup of website behavior when the crawler requests a non-existent page
the field is a status code of server response
normally, a server returns 404 response code
time_load_high
integer
the number of pages with the loading time of more than 3 seconds
the data is available for all pages
time_waiting_high
integer
the number of pages with waiting time
(time spent waiting for the initial response, also known as the Time To First Byte) of more than 1.5 sec
the data is available for all pages
title_duplicate_tag
integer
the number of pages with more than one <title> tag on the page
the data is available only for canonical pages
title_empty
integer
the number of pages with empty or absent tags <title>
the data is available only for canonical pages
title_inappropriate
integer
the number of pages with <title> tags that are irrelevant to the content of the page (only for canonical pages)
the relevance threshold is 0.3
the data is not available for the pages that don’t have <title> tag
title_long
integer
number of pages with too long <title> tags
in case the length is more than 65 characters
maximum number of displayed characters is 255, if the number is
bigger – only first 255 characters will be shown
the data is available only for canonical pages
title_short
integer
the number of pages with too short <title> tags
in case the length is less than 30 characters
the data is available only for canonical pages
www
boolean
usage of subdomain www
true – if there is at least one page on the subdomain ‘www’ which response code is 200
‘www’ is the only subdomain that is parsed by our crawler within the specified domain
Get Pages
Using this function, you can retrieve structured data for each page of a website that has been scanned. To get the list of pages based on the set parameters, use the extended version of the Get Filtered Pages function.
All results for this function will be available only after the scanning is over status="crawled". If the task is being processed, you will be able to see only the data that was collected till the moment you check.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_pages/123456789")
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_get_pages()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var taskid = 123456789;
var response = await httpClient.GetAsync($"v2/op_tasks_get_pages/{taskid}");
var obj = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else if (obj.results_count != 0)
{
foreach (var result in obj.results)
{
Console.WriteLine(result);
}
}
else
Console.WriteLine("no results");
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_get_pages() throws JSONException, IOException {
HttpClient client;
client = HttpClientBuilder.create().build();
int taskId = 123456789;
HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_pages/" + taskId);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
get.setHeader("Content-type", "application/json");
get.setHeader("Authorization", "Basic " + basicAuth);
HttpResponse response = client.execute(get);
JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));
if (obj.get("status").equals("error")) {
System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
} else {
JSONArray results = obj.getJSONArray("results");
if (results.length() > 0) {
for (int i = 0; i < results.length(); i++) {
System.out.println(results.get(i));
}
} else {
System.out.println("no results");
}
}
}
}
The above command returns JSON structured like this:
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, then you can see more detailed information in the error array
error
array
informational array of the error only if status=“error”
code
integer
error code
message
string
text description of the error
results_time
string
execution time, seconds
results_count
string
number of elements in the results array
results
array
results array
post_id
string
index in the array received in a POST array
post_site
string
site received in a POST array
task_id
integer
unique task identifier in our system(UInt64)
you will be able to use it to request results of this task at any time within the next 30 days
string_search_containment
string
string_search_containment received in a POST request
default value: ‘null’.
crawl_max_pages
integer
the maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning – the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
number of words in the content of the page
exceptions are the a tags, etc. the text that is present in the body tag is being parsed. the text that is in script, style, a, noscript, select, button, embed, framest tags as well as comments is ignored
content_encoding
string
compression algorithm of the content of the page more information
date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
crawled
boolean
status of the page
deprecated_html_tags
array
array of deprecated html tags of the page
duplicate_meta_tags
array
array of meta tags that are duplicated
favicon
string
favicon of the page
h1_count
integer
count of H1 tags
h2_count
integer
count of H2 tags
h3_count
integer
count of H3 tags
have_deprecated_tags
boolean
presence of deprecated tags on the page
have_doctype
boolean
presence of <!DOCTYPE html> on the page
have_page_duplicates
boolean
presence of duplicate pages of the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘page’
if you request the data during the scanning – the value of this field will be ‘null’
have_enc_meta_tag
boolean
presence of tag <charset> on the page
have_flash
boolean
presence of flash elements on the page
have_frame
boolean
presence of frames on the page
have_lorem_ipsum
boolean
presence of ‘lorem ipsum’ text on the page
have_meta_description_duplicates
boolean
there are pages with duplicate content of meta tag description on the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘description’
have_recursive_canonical
boolean
presence of recursive canonical
if you request the data during the scanning – the value of this field will be ‘null’
have_redirect
boolean
presence of redirects on the page
if you request the data during the scanning – the value of this field will be ‘null’
have_title_duplicates
boolean
there are pages with duplicate content of tag <title>
to get these pages you can call op_tasks_get_duplicates with parameter ‘title’
images_count
integer
number of images on the page
images_invalid_alt
integer
number of images with empty or missing tag alt
images_invalid_title
integer
number of images with empty or missing tag title
links_broken
integer
number of broken links from the page
pages with 4xx response code will have 0 value in this field
if you request the data during the scanning – the value of this field will be ‘null’
links_external
integer
number of external links on the page
links_referring
integer
number of referring links to the pagehave_recursive_canonical
links_internal
integer
number of internal links on the page
meta_description
string
content of meta tag description
meta_description_consistency
float
consistency of meta tag description with page content
from 0 to 1
meta_description_length
integer
length of meta tag description content
meta_keywords
string
content of meta tag keywords
meta_keywords_consistency
float
consistency of meta tag keywords with page content
from 0 to 1
page_allowed
boolean
page access is not disallowed by meta tag robots or X-Robots-Tag HTTP header
page_redirect
string
url of page where the specified page is redirected to
the field is not empty only if a status code is 3xx
page_size
integer
page size in bytes
plain_text_rate
float
plaintext rate value (plain_text_size / page_size)
plain_text_size
integer
page size in symbols
relative_path_length
integer
relative path length of the page URL
response_code
integer
HTTP response code
seo_friendly_url
boolean
page has an ‘SEO-friendly URL’
true if seo_friendly_url_characters_check=true and seo_friendly_url_dynamic_check=true and seo_friendly_url_keywords_check=true and seo_friendly_url_relative_length_check=true
seo_friendly_url_characters_check
boolean
checking for symbols in accordance with Google recommendations
only uppercase and lowercase Latin characters, digits and dashes are allowed
‘true’ – if the test is passed.
seo_friendly_url_dynamic_check
boolean
presence of dynamic parameters for a resource
like ‘https://example.com/some_url.php ?adsasd=5’
if there are dynamic symbols in the URL then the status will be ‘false’
seo_friendly_url_keywords_check
boolean
consistency of page url with meta tag keywords
if the keywords tag is empty or absent then the URL is being compared with the content of <title> tag. if the title tag absent then this test is considered as not passed
seo_friendly_url_relative_length_check
boolean
checking the length of the relative way
url should not be longer than 120 characters
ssl
boolean
usage of the secure SSL protocol
ssl_handshake_time
integer
time (in milliseconds) spent on the ‘SSL handshake’
string_containment_check
boolean
shows the presence or absence of the specified text in the on string_search_containment the page
if there is no text specified in the string_search_containment field, then the string_containment_check field will have ‘false’ value
time_connection
integer
time (in milliseconds) spent on establishing the connection
time_download
integer
time (in milliseconds) spent on the loading of resources
time_total_load
integer
total time time_connection + time_sending_request + time_waiting + time_download + ssl_handshake_time
time_sending_request
integer
time (in milliseconds) spent on sending a request to a server
time_waiting
integer
time spent waiting for the initial response, also known as the Time To First Byte
title
string
content of tag <title>
title_consistency
integer
consistency of tag <title> with page content
from 0 to 1
title_duplicate_tag
boolean
tag <title> is duplicated
(specified more than once within the same page)
title_length
integer
length of tag <title> content
www
boolean
usage of the subdomain www
Get Filtered Pages
Using this function, you can get a list of pages based on the parameters you specify. It is the primary function which you can use to get pages with on-page errors. For instance, you can set the parameters to receive the list of pages with not SEO-friendly URL, get the list of pages with too high loading time, get the list of pages with low readability score, etc.
All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
post_data = dict()
post_data = [
dict(
task_id=151668277,
limit=1000,
offset=0,
filters=[
["h1_count", "=", 0],
["content_count_words", ">", 200]
]
)
]
response = client.post("/v2/op_tasks_get_pages_filter", dict(data=post_data))
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_get_pages_filter()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var postObject = new[]
{
new
{
task_id = 123456789,
limit = 1000,
offset = 0,
filters = new[]
{
new object[] { "h1_count", ">", 0 },
new object[] { "content_count_words", ">", 100 }
}
}
};
var pagePostResponse = await httpClient.PostAsync("v2/op_tasks_get_pages_filter", new StringContent(JsonConvert.SerializeObject(new { data = postObject })));
var obj = JsonConvert.DeserializeObject(await pagePostResponse.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else
{
foreach (var result in obj.results)
{
Console.WriteLine(result);
}
}
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_get_pages_filter() throws JSONException, IOException, URISyntaxException {
URI url = new URI("https://api.dataforseo.com/v2/op_tasks_get_pages_filter");
HttpClient client = HttpClientBuilder.create().build();
HttpPost post = new HttpPost(url);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
Map<Integer, Map<String, Object>> postValues = new HashMap<>();
Random rnd = new Random();
Map<String, Object> postObj = new HashMap<>();
postObj.put("task_id", 151668277);
postObj.put("limit", 1000);
postObj.put("offset", 0);
postObj.put("filters", new Object[]{
new Object[]{"h1_count", "=", 0},
new Object[]{"content_count_words", ">", 200}
});
postValues.put(rnd.nextInt(30000000), postObj);
JSONObject json = new JSONObject().put("data", postValues);
StringEntity input = new StringEntity(json.toString());
input.setContentType("application/json");
post.setHeader("Content-type", "application/json");
post.setHeader("Authorization", "Basic " + basicAuth);
post.setEntity(input);
HttpResponse pagePostResponse = client.execute(post);
JSONObject obj = new JSONObject(EntityUtils.toString(pagePostResponse.getEntity()));
if (obj.get("status").equals("error")) {
System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message:" + obj.getJSONObject("error").get("message"));
} else {
JSONArray results = obj.getJSONArray("results");
if (results.length() > 0) {
for (int i = 0; i < results.length(); i++) {
if (results.getJSONObject(i).get("status").equals("error"))
System.out.println("Error in task with post_id " + results.getJSONObject(i).get("post_id") + ". Code: " + results.getJSONObject(i).getJSONObject("error").get("code") + " Message: " + results.getJSONObject(i).getJSONObject("error").get("message"));
else {
System.out.println(results.get(i));
}
}
} else {
System.out.println("no results");
}
}
}
}
The above command returns JSON structured like this:
All POST data should be sent in the JSON format (UTF-8 encoding). The pages filtering request is done using POST method when array of filtering request is sent to the data field. Each of the array elements has the following structure:
Field name
Type
Description
task_id
integer
unique identifier returned to you in the response from our service when you set a task required field
limit
integer
maximum number of returned pages
offset
integer
offset in results array of returned pages
filter
array
array with filters required field
$field
string
the name of filtered field required field
list of all available fields you can see in the results of Get Pages
$operator
string
comparison operator required field
available operators: >, =, !=, <>, <, <=, >=, contains, notcontains, startswith, endswith
$value
string
comparison value required field
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, then you can see more detailed information in the error array
error
array
informational array of error only if status=“error”
code
integer
error code
message
string
text description of an error
results_time
string
execution time, seconds
results_count
string
number of elements in the results array
results
array
results array
post_id
string
index in the array received in a POST array
post_site
string
site received in a POST array
task_id
integer
unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
string_search_containment
string
string_search_containment received in a POST request
default value: ‘null’.
crawl_max_pages
integer
maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning – the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
number of words in the content of the page
exceptions are the a tags, etc. the text that is present in the body tag is being parsed. the text that is in script, style, a, noscript, select, button, embed, framest tags as well as comments is ignored
content_encoding
string
compression algorithm of the content of the page more information
date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
crawled
boolean
status of the page
deprecated_html_tags
array
array of deprecated html tags of the page
duplicate_meta_tags
array
array of meta tags that are duplicated
favicon
string
favicon of the page
h1_count
integer
count of H1 tags
h2_count
integer
count of H2 tags
h3_count
integer
count of H3 tags
have_deprecated_tags
boolean
presence of deprecated tags on the page
have_doctype
boolean
presence of <!DOCTYPE html> on the page
have_page_duplicates
boolean
presence of duplicate pages of the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘page’
if you request the data during the scanning – the value of this field will be ‘null’
have_enc_meta_tag
boolean
presence of tag <charset> on the page
have_flash
boolean
presence of flash elements on the page
have_frame
boolean
presence of frames on the page
have_lorem_ipsum
boolean
presence of ‘lorem ipsum’ text on the page
have_meta_description_duplicates
boolean
there are pages with duplicate content of meta tag description on the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘description’
have_recursive_canonical
boolean
presence of recursive canonical
if you request the data during the scanning – the value of this field will be ‘null’
have_redirect
boolean
presence of redirects on the page
if you request the data during the scanning – the value of this field will be ‘null’
have_title_duplicates
boolean
there are pages with duplicate content of tag <title>
to get these pages you can call op_tasks_get_duplicates with parameter ‘title’
images_count
integer
number of images on the page
images_invalid_alt
integer
number of images with empty or missing tag alt
images_invalid_title
integer
number of images with empty or missing tag title
links_broken
integer
number of broken links from the page
pages with 4xx response code will have 0 value in this field
if you request the data during the scanning – the value of this field will be ‘null’
links_external
integer
number of external links on the page
links_referring
integer
number of referring links to the page
if you request the data during the scanning – the value of this field will be ‘null’
links_internal
integer
number of internal links on the page
meta_description
string
content of meta tag description
meta_description_consistency
float
consistency of meta tag description with page content
from 0 to 1
meta_description_length
integer
length of meta tag description content
meta_keywords
string
content of meta tag keywords
meta_keywords_consistency
float
consistency of meta tag keywords with page content
from 0 to 1
page_allowed
boolean
page access is not disallowed by meta tag robots or X-Robots-Tag HTTP header
page_redirect
string
url of page where the specified page is redirected to
the field is not empty only if a status code is 3xx
page_size
integer
page size in bytes
plain_text_rate
float
plaintext rate value (plain_text_size / page_size)
plain_text_size
integer
page size in symbols
relative_path_length
integer
relative path length of the page URL
response_code
integer
HTTP response code
seo_friendly_url
boolean
page has an ‘SEO-friendly URL’
true if seo_friendly_url_characters_check=true and seo_friendly_url_dynamic_check=true and seo_friendly_url_keywords_check=true and seo_friendly_url_relative_length_check=true
seo_friendly_url_characters_check
boolean
checking for symbols in accordance with Google recommendations
only uppercase and lowercase Latin characters, digits and dashes are allowed
‘true’ – if the test is passed.
seo_friendly_url_dynamic_check
boolean
presence of dynamic parameters for a resource
like ‘https://example.com/some_url.php ?adsasd=5’
if there are dynamic symbols in the URL then the status will be ‘false’
seo_friendly_url_keywords_check
boolean
consistency of page url with meta tag keywords
if the keywords tag is empty or absent then the URL is being compared with the content of <title> tag. if the title tag absent then this test is considered as not passed
seo_friendly_url_relative_length_check
boolean
checking the length of the relative way
url should not be longer than 120 characters
ssl
boolean
usage of the secure SSL protocol
ssl_handshake_time
integer
time (in milliseconds) spent on the ‘SSL handshake’
string_containment_check
boolean
shows the presence or absence of the specified text in the on string_search_containment the page
if there is no text specified in the string_search_containment field, then the string_containment_check field will have ‘false’ value
time_connection
integer
time (in milliseconds) spent on establishing the connection
time_download
integer
time (in milliseconds) spent on the loading of resources
time_total_load
integer
total time time_connection + time_sending_request + time_waiting + time_download + ssl_handshake_time
time_sending_request
integer
time (in milliseconds) spent on sending a request to a server
time_waiting
integer
time spent waiting for the initial response, also known as the Time To First Byte
title
string
content of tag <title>
title_consistency
integer
consistency of tag <title> with page content
from 0 to 1
title_length
integer
length of tag <title> content
www
boolean
usage of the subdomain www
Get Broken Pages
Using this function, you can get a list of broken pages (4xx response code). There may be referring links from other pages of a website to the broken pages that don’t exist.
All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_broken_pages/123456789")
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_get_broken_pages()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var taskid = 123456789;
var response = await httpClient.GetAsync($"v2/op_tasks_get_broken_pages/{taskid}");
var obj = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else if (obj.results_count != 0)
{
foreach (var result in obj.results)
{
Console.WriteLine(result);
}
}
else
Console.WriteLine("no results");
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_get_broken_pages() throws JSONException, IOException {
HttpClient client;
client = HttpClientBuilder.create().build();
int taskId = 123456789;
HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_broken_pages/" + taskId);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
get.setHeader("Content-type", "application/json");
get.setHeader("Authorization", "Basic " + basicAuth);
HttpResponse response = client.execute(get);
JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));
if (obj.get("status").equals("error")) {
System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
} else {
JSONArray results = obj.getJSONArray("results");
if (results.length() > 0) {
for (int i = 0; i < results.length(); i++) {
System.out.println(results.get(i));
}
} else {
System.out.println("no results");
}
}
}
}
The above command returns JSON structured like this:
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, then you can see more detailed information in the error array
error
array
informational array of error only if status=“error”
code
integer
error code
message
string
text description of an error
results_time
string
execution time, seconds
results_count
string
number of elements in the results array
results
array
results array
post_id
string
index in the array received in a POST array
post_site
string
site received in a POST array
task_id
integer
unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
string_search_containment
string
string_search_containment received in a POST request
default value: ‘null’.
crawl_max_pages
integer
maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning – the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
number of words in the content of the page
exceptions are the a tags, etc. the text that is present in the body tag is being parsed. the text that is in script, style, a, noscript, select, button, embed, framest tags as well as comments is ignored
content_encoding
string
compression algorithm of the content of the page more information
date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
crawled
boolean
status of the page
deprecated_html_tags
array
array of deprecated html tags of the page
duplicate_meta_tags
array
array of meta tags that are duplicated
favicon
string
favicon of the page
h1_count
integer
count of H1 tags
h2_count
integer
count of H2 tags
h3_count
integer
count of H3 tags
have_deprecated_tags
boolean
presence of deprecated tags on the page
have_doctype
boolean
presence of <!DOCTYPE html> on the page
have_page_duplicates
boolean
presence of duplicate pages of the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘page’
if you request the data during the scanning – the value of this field will be ‘null’
have_enc_meta_tag
boolean
presence of tag <charset> on the page
have_flash
boolean
presence of flash elements on the page
have_frame
boolean
presence of frames on the page
have_lorem_ipsum
boolean
presence of ‘lorem ipsum’ text on the page
have_meta_description_duplicates
boolean
there are pages with duplicate content of meta tag description on the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘description’
have_recursive_canonical
boolean
presence of recursive canonical
if you request the data during the scanning – the value of this field will be ‘null’
have_redirect
boolean
presence of redirects on the page
if you request the data during the scanning – the value of this field will be ‘null’
have_title_duplicates
boolean
there are pages with duplicate content of tag <title>
to get these pages you can call op_tasks_get_duplicates with parameter ‘title’
images_count
integer
number of images on the page
images_invalid_alt
integer
number of images with empty or missing tag alt
images_invalid_title
integer
number of images with empty or missing tag title
links_broken
integer
number of broken links from the page
pages with 4xx response code will have 0 value in this field
if you request the data during the scanning – the value of this field will be ‘null’
links_external
integer
number of external links on the page
links_referring
integer
number of referring links to the page
if you request the data during the scanning – the value of this field will be ‘null’
links_internal
integer
number of internal links on the page
meta_description
string
content of meta tag description
meta_description_consistency
float
consistency of meta tag description with page content
from 0 to 1
meta_description_length
integer
length of meta tag description content
meta_keywords
string
content of meta tag keywords
meta_keywords_consistency
float
consistency of meta tag keywords with page content
from 0 to 1
page_allowed
boolean
page access is not disallowed by meta tag robots or X-Robots-Tag HTTP header
page_redirect
string
url of page where the specified page is redirected to
the field is not empty only if a status code is 3xx
page_size
integer
page size in bytes
plain_text_rate
float
plaintext rate value (plain_text_size / page_size)
plain_text_size
integer
page size in symbols
relative_path_length
integer
relative path length of the page URL
response_code
integer
HTTP response code
seo_friendly_url
boolean
page has an ‘SEO-friendly URL’
true if seo_friendly_url_characters_check=true and seo_friendly_url_dynamic_check=true and seo_friendly_url_keywords_check=true and seo_friendly_url_relative_length_check=true
seo_friendly_url_characters_check
boolean
checking for symbols in accordance with Google recommendations
only uppercase and lowercase Latin characters, digits and dashes are allowed
‘true’ – if the test is passed
seo_friendly_url_dynamic_check
boolean
presence of dynamic parameters for a resource
like ‘https://example.com/some_url.php ?adsasd=5’
if there are dynamic symbols in the URL then the status will be ‘false’
seo_friendly_url_keywords_check
boolean
consistency of page url with meta tag keywords
if the keywords tag is empty or absent then the URL is being compared with the content of <title> tag. if the title tag absent then this test is considered as not passed
seo_friendly_url_relative_length_check
boolean
checking the length of the relative way
url should not be longer than 120 characters
ssl
boolean
usage of the secure SSL protocol
ssl_handshake_time
integer
time (in milliseconds) spent on the ‘SSL handshake’
string_containment_check
boolean
shows the presence or absence of the specified text in the on string_search_containment the page
if there is no text specified in the string_search_containment field, then the string_containment_check field will have ‘false’ value
time_connection
integer
time (in milliseconds) spent on establishing the connection
time_download
integer
time (in milliseconds) spent on the loading of resources
time_total_load
integer
total time time_connection + time_sending_request + time_waiting + time_download + ssl_handshake_time
time_sending_request
integer
time (in milliseconds) spent on sending a request to a server
time_waiting
integer
time spent waiting for the initial response, also known as the Time To First Byte
title
string
content of tag <title>
title_consistency
integer
consistency of tag <title> with page content
from 0 to 1
title_length
integer
length of tag <title> content
www
boolean
usage of the subdomain www
Get Duplicate Pages
Using this function, you can get a list of duplicate pages. Duplicates can be received based on some accumulator, for instance, title, description or content of a page.
All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_duplicates/123456789")
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_get_duplicates()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var taskid = 123456789;
var response = await httpClient.GetAsync($"v2/op_tasks_get_duplicates/{taskid}");
var obj = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else if (obj.results_count != 0)
{
foreach (var result in obj.results)
{
Console.WriteLine(result);
}
}
else
Console.WriteLine("no results");
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_get_duplicates() throws JSONException, IOException {
HttpClient client;
client = HttpClientBuilder.create().build();
int taskId = 123456789;
HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_duplicates/" + taskId);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
get.setHeader("Content-type", "application/json");
get.setHeader("Authorization", "Basic " + basicAuth);
HttpResponse response = client.execute(get);
JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));
if (obj.get("status").equals("error")) {
System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
} else {
JSONArray results = obj.getJSONArray("results");
if (results.length() > 0) {
for (int i = 0; i < results.length(); i++) {
System.out.println(results.get(i));
}
} else {
System.out.println("no results");
}
}
}
}
The above command returns JSON structured like this:
$duplicate_type – can have the following string values: ‘title’, ‘description’. Default value is ‘title’.
If you want to find duplicated content, you will need to specify the relative URL of the page, which content will be used to run the search. For example: https://api.dataforseo.com/v2/op_tasks_get_duplicates/$task_id/'/your-content-page'
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, then you can see more detailed information in the error array
error
array
informational array of error only if status=“error”
code
integer
error code
message
string
text description of an error
results_time
string
execution time, seconds
results_count
string
number of elements in the results array
results
array
results array
post_id
string
index in the array received in a POST array
post_site
string
site received in a POST array
task_id
integer
unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
string_search_containment
string
string_search_containment received in a POST request
default value: ‘null’.
crawl_max_pages
integer
maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning – the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
duplicates
array
array of duplicate pages
accumulator
string
the sign on which the pages were grouped
pages
array
array of duplicate pages with the accumulator
address_full
string
full page address
address_relative
string
relative page address
absent_h1_tags
integer
number of pages without H1 tags
canonical_another
boolean
presence of another canonical page
‘true’ – if page is non-canonical
canonical_page
string
canonical page
canonical_page_recursive
string
recursive canonical page
if you request the data during the scanning – the value of this field will be ‘null’
number of words in the content of the page
exceptions are the a tags, etc. the text that is present in the body tag is being parsed. the text that is in script, style, a, noscript, select, button, embed, framest tags as well as comments is ignored
content_encoding
string
compression algorithm of the content of the page more information
date and time of the end of crawling
in the format year-month-day:minutes:GMT_difference_hours:GMT_difference_minutes
for example: ‘2017-12-13 15:30:34 +00:00’
crawled
boolean
status of the page
deprecated_html_tags
array
array of deprecated html tags of the page
duplicate_meta_tags
array
array of meta tags that are duplicated
favicon
string
favicon of the page
h1_count
integer
count of H1 tags
h2_count
integer
count of H2 tags
h3_count
integer
count of H3 tags
have_deprecated_tags
boolean
presence of deprecated tags on the page
have_doctype
boolean
presence of <!DOCTYPE html> on the page
have_page_duplicates
boolean
presence of duplicate pages of the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘page’
if you request the data during the scanning – the value of this field will be ‘null’
have_enc_meta_tag
boolean
presence of tag <charset> on the page
have_flash
boolean
presence of flash elements on the page
have_frame
boolean
presence of frames on the page
have_lorem_ipsum
boolean
presence of ‘lorem ipsum’ text on the page
have_meta_description_duplicates
boolean
there are pages with duplicate content of meta tag description on the page
to get these pages you can call op_tasks_get_duplicates with parameter ‘description’
have_recursive_canonical
boolean
presence of recursive canonical
if you request the data during the scanning – the value of this field will be ‘null’
have_redirect
boolean
presence of redirects on the page
if you request the data during the scanning – the value of this field will be ‘null’
have_title_duplicates
boolean
there are pages with duplicate content of tag <title>
to get these pages you can call op_tasks_get_duplicates with parameter ‘title’
images_count
integer
number of images on the page
images_invalid_alt
integer
number of images with empty or missing tag alt
images_invalid_title
integer
number of images with empty or missing tag title
links_broken
integer
number of broken links from the page
pages with 4xx response code will have 0 value in this field
if you request the data during the scanning – the value of this field will be ‘null’
links_external
integer
number of external links on the page
links_referring
integer
number of referring links to the page
if you request the data during the scanning – the value of this field will be ‘null’
links_internal
integer
number of internal links on the page
meta_description
string
content of meta tag description
meta_description_consistency
float
consistency of meta tag description with page content
from 0 to 1
meta_description_length
integer
length of meta tag description content
meta_keywords
string
content of meta tag keywords
meta_keywords_consistency
float
consistency of meta tag keywords with page content
from 0 to 1
page_allowed
boolean
page access is not disallowed by meta tag robots or X-Robots-Tag HTTP header
page_redirect
string
url of page where the specified page is redirected to
the field is not empty only if a status code is 3xx
page_size
integer
page size in bytes
plain_text_rate
float
plaintext rate value (plain_text_size / page_size)
plain_text_size
integer
page size in symbols
relative_path_length
integer
relative path length of the page URL
response_code
integer
HTTP response code
seo_friendly_url
boolean
page has an ‘SEO-friendly URL’
true if seo_friendly_url_characters_check=true and seo_friendly_url_dynamic_check=true and seo_friendly_url_keywords_check=true and seo_friendly_url_relative_length_check=true
seo_friendly_url_characters_check
boolean
checking for symbols in accordance with Google recommendations
only uppercase and lowercase Latin characters, digits and dashes are allowed
‘true’ – if the test is passed.
seo_friendly_url_dynamic_check
boolean
presence of dynamic parameters for a resource
like ‘https://example.com/some_url.php ?adsasd=5’
if there are dynamic symbols in the URL then the status will be ‘false’
seo_friendly_url_keywords_check
boolean
consistency of page url with meta tag keywords
if the keywords tag is empty or absent then the URL is being compared with the content of <title> tag. if the title tag absent then this test is considered as not passed
seo_friendly_url_relative_length_check
boolean
checking the length of the relative way
url should not be longer than 120 characters
ssl
boolean
usage of the secure SSL protocol
ssl_handshake_time
integer
time (in milliseconds) spent on the ‘SSL handshake’
string_containment_check
boolean
shows the presence or absence of the specified text in the on string_search_containment the page
if there is no text specified in the string_search_containment field, then the string_containment_check field will have ‘false’ value
time_connection
integer
time (in milliseconds) spent on establishing the connection
time_download
integer
time (in milliseconds) spent on the loading of resources
time_sending_request
integer
time (in milliseconds) spent on sending a request to a server
time_total_load
integer
total time time_connection + time_sending_request + time_waiting + time_download + ssl_handshake_time
time_waiting
integer
time spent waiting for the initial response, also known as the Time To First Byte
title
string
content of tag <title>
title_consistency
integer
consistency of tag <title> with page content
from 0 to 1
title_length
integer
length of tag <title> content
www
boolean
usage of the subdomain www
Get Links To Page
Using this function, you can get a list of all referring links to a certain page.
All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_links_to/123456789/'/relative/page/on/site.html'")
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_get_links_to()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var taskid = 123456789;
var pageonsite = "'/relative/page/on/site.html'";
var response = await httpClient.GetAsync($"v2/op_tasks_get_links_to/{taskid}/{pageonsite}");
var obj = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else if (obj.results_count != 0)
{
foreach (var result in obj.results)
{
Console.WriteLine(result);
}
}
else
Console.WriteLine("no results");
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_get_links_to() throws JSONException, IOException {
HttpClient client;
client = HttpClientBuilder.create().build();
int taskId = 123456789;
String pageonsite = "'/relative/page/on/site.html'";
HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_links_to/" + taskId + "/" + pageonsite);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
get.setHeader("Content-type", "application/json");
get.setHeader("Authorization", "Basic " + basicAuth);
HttpResponse response = client.execute(get);
JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));
if (obj.get("status").equals("error")) {
System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
} else {
JSONArray results = obj.getJSONArray("results");
if (results.length() > 0) {
for (int i = 0; i < results.length(); i++) {
System.out.println(results.get(i));
}
} else {
System.out.println("no results");
}
}
}
}
The above command returns JSON structured like this:
$page – is a relative page address on a site. The page value must be enclosed in single quotes, like this https://api.dataforseo.com/v2/op_tasks_get_links_to/12345/'/page/on/site.html'
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, then you can see more detailed information in the error array
error
array
informational array of error only if status=“error”
code
integer
error code
message
string
text description of an error
results_time
string
execution time, seconds
results_count
string
number of elements in the results array
results
array
results array
post_id
string
index in the array received in a POST array
post_site
string
site received in a POST array
task_id
integer
unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
string_search_containment
string
string_search_containment received in a POST request
default value: ‘null’.
crawl_max_pages
integer
maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning – the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
links_to
array
array of referring links
alt
string
alt attribute of an element
anchor
string
an anchor of a link
link_from
string
full page address of referring page
link_to
string
full page address of requested page
nofollow
boolean
presence of nofollow attribute on the referring page
information about “nofollow”: “nofollow” provides a way for webmasters to tell search engines “don’t follow links on this page” or “don’t follow this specific link.”
page_from
string
relative page address of referring page
page_to
string
relative page address of requested page
relative
boolean
relevance of the referring page
ssl_from_use
boolean
ssl used on the referring page
ssl_to_use
boolean
ssl used on the requested page
state
string
current link state
possible values: ‘dead’ or ‘alive’
text_post
string
text after anchor
text_pre
string
text before anchor
type
string
type of link
possible values:
‘href’ – standard link, for instance <a href...>...</a>,
‘image’ – a link where <img> tag was found,
for instance <a href...>...<img>...</img>...</a>,
‘canonical’ – meta canonical link,
‘external’ – external link,
‘http-equiv’ – link from redirect meta http-equiv refresh.
www_from_use
boolean
www. subdomain used on the referring page
www_to_use
string
www. subdomain used on the requested page
Get Links From Page
Using this function, you can get a list of external and internal links from a certain page.
All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
response = client.get("/v2/op_tasks_get_links_from/123456789/'/relative/page/on/site.html'")
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_get_links_from()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var taskid = 123456789;
var pageonsite = "'/relative/page/on/site.html'";
var response = await httpClient.GetAsync($"v2/op_tasks_get_links_from/{taskid}/{pageonsite}");
var obj = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else if (obj.results_count != 0)
{
foreach (var result in obj.results)
{
Console.WriteLine(result);
}
}
else
Console.WriteLine("no results");
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_get_links_from() throws JSONException, IOException {
HttpClient client;
client = HttpClientBuilder.create().build();
int taskId = 123456789;
String pageonsite = "'/relative/page/on/site.html'";
HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_get_links_from/" + taskId + "/" + pageonsite);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
get.setHeader("Content-type", "application/json");
get.setHeader("Authorization", "Basic " + basicAuth);
HttpResponse response = client.execute(get);
JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));
if (obj.get("status").equals("error")) {
System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
} else {
JSONArray results = obj.getJSONArray("results");
if (results.length() > 0) {
for (int i = 0; i < results.length(); i++) {
System.out.println(results.get(i));
}
} else {
System.out.println("no results");
}
}
}
}
The above command returns JSON structured like this:
$page – is a relative page address on a site. The page value must be enclosed in single quotes, like this https://api.dataforseo.com/v2/op_tasks_get_links_from/12345/'/page/on/site.html'
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, then you can see more detailed information in the error array
error
array
informational array of error only if status=“error”
code
integer
error code
message
string
text description of an error
results_time
string
execution time, seconds
results_count
string
number of elements in the results array
results
array
results array
post_id
string
index in the array received in a POST array
post_site
string
site received in a POST array
task_id
integer
unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
string_search_containment
string
string_search_containment received in a POST request
default value: ‘null’.
crawl_max_pages
integer
maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning – the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
links_from
array
array of links
alt
string
alt attribute of an element
anchor
string
an anchor of a link
link_from
string
full page address of the requested page
link_to
string
full page address of a link from the page
nofollow
boolean
presence of nofollow attribute on the requested page
information about “nofollow”: “nofollow” provides a way for webmasters to tell search engines “don’t follow links on this page” or “don’t follow this specific link.”
page_from
string
relative page address of the requested page
page_to
string
relative page address of a link from the page
relative
boolean
relevance of a link from the page
ssl_from_use
boolean
ssl used on the requested page
ssl_to_use
boolean
ssl used on the result page
state
string
current link state
possible values: ‘dead’ or ‘alive’
text_post
string
text after anchor
text_pre
string
text before anchor
type
string
type of link
possible values:
‘href’ – standard link, for instance <a href...>...</a>,
‘image’ – a link where <img> tag was found,
for instance <a href...>...<img>...</img>...</a>,
‘canonical’ – meta canonical link,
‘external’ – external link,
‘http-equiv’ – link from redirect meta http-equiv refresh.
www_from_use
boolean
www. subdomain used on the requested page
www_to_use
string
www. subdomain used on the result page
Get H Tags On Page
Using this function, you can get a list of H1 tags on a page.
All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
response = client.get("/v2/op_tasks_htags_on_page/123456789/'/relative/page/on/site.html'")
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_htags_on_page()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var taskid = 123456789;
var pageonsite = "'/relative/page/on/site.html'";
var response = await httpClient.GetAsync($"v2/op_tasks_htags_on_page/{taskid}/{pageonsite}");
var obj = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else
{
foreach (var result in obj.results)
{
Console.WriteLine(result);
}
}
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_htags_on_page() throws JSONException, IOException {
HttpClient client;
client = HttpClientBuilder.create().build();
int taskId = 123456789;
String pageonsite = "'/relative/page/on/site.html'";
HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_htags_on_page/" + taskId + "/" + pageonsite);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
get.setHeader("Content-type", "application/json");
get.setHeader("Authorization", "Basic " + basicAuth);
HttpResponse response = client.execute(get);
JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));
if (obj.get("status").equals("error")) {
System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
} else {
JSONArray results = obj.getJSONArray("results");
if (results.length() > 0) {
for (int i = 0; i < results.length(); i++) {
System.out.println(results.get(i));
}
} else {
System.out.println("no results");
}
}
}
}
The above command returns JSON structured like this:
{
"status": "ok",
"results_time": "0.0343 sec.",
"results_count": 9,
"results": [
{
"post_id": "0",
"post_site": "rankactive.com",
"task_id": 136371534,
"string_search_containment": "",
"crawl_max_pages": 20,
"crawl_start": "2018-04-02 19:02:24.659055+03",
"crawl_end": "2018-04-02 19:05:14.710688+03",
"status": "crawled",
"htags_on_page": [
{
"h1": [
"Notification Manager"
],
"h2": [
"Notification manager: video overview",
"How does it help?",
"See it in action",
"Amazing opportunities for your success",
"Ready to try?",
"Customers reviews"
],
"h3": [
"Ready to try?",
"Featured on"
]
}
]
}
]
}
$page – is a relative page address on a site. The page value must be enclosed in single quotes, like this https://api.dataforseo.com/v2/op_tasks_htags_on_page/12345/'/page/on/site.html'
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, then you can see more detailed information in the error array
error
array
informational array of error only if status=“error”
code
integer
error code
message
string
text description of an error
results_time
string
execution time, seconds
results_count
string
number of elements in the results array
results
array
results array
post_id
string
index in the array received in a POST array
post_site
string
site received in a POST array
task_id
integer
unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
string_search_containment
string
string_search_containment received in a POST request
default value: ‘null’.
crawl_max_pages
integer
maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning – the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”
htags_on_page
array
array of H tags
h1
array
array of H1 tags
h2
array
array of H2 tags
h3
array
array of H3 tags
Get Images On Page
Using this function, you can get a list of images on a page.
All results for this function will be available only after the scanning is over status="crawled". If a task is being processed, you will be able to see only the data that was collected till the moment you check.
Instead of ‘login’ and ‘password’ use your credentials from https://my.dataforseo.com/#api_dashboard
from client import RestClient
#You can download this file from here https://api.dataforseo.com/_examples/python/_python_Client.zip
client = RestClient("login", "password")
response = client.get("/v2/op_tasks_images_on_page/123456789/'/relative/page/on/site.html'")
if response["status"] == "error":
print("error. Code: %d Message: %s" % (response["error"]["code"], response["error"]["message"]))
else:
print(response["results"])
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace DataForSeoDemos
{
public static partial class Demos
{
public static async Task op_tasks_images_on_page()
{
var httpClient = new HttpClient
{
BaseAddress = new Uri("https://api.dataforseo.com/"),
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
DefaultRequestHeaders = { Authorization = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(Encoding.ASCII.GetBytes("login:password"))) }
};
var taskid = 123456789;
var pageonsite = "'/relative/page/on/site.html'";
var response = await httpClient.GetAsync($"v2/op_tasks_images_on_page/{taskid}/{pageonsite}");
var obj = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync());
if (obj.status == "error")
Console.WriteLine($"error. Code: {obj.error.code} Message: {obj.error.message}");
else if (obj.results_count != 0)
{
foreach (var result in obj.results)
{
var resultItem = ((IEnumerable)result).First();
Console.WriteLine(resultItem);
}
}
else
Console.WriteLine("no results");
}
}
}
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.*;
public class Demos {
public static void op_tasks_images_on_page() throws JSONException, IOException {
HttpClient client;
client = HttpClientBuilder.create().build();
int taskId = 123456789;
String pageonsite = "'/relative/page/on/site.html'";
HttpGet get = new HttpGet("https://api.dataforseo.com/v2/op_tasks_images_on_page/" + taskId + "/" + pageonsite);
//Instead of 'login' and 'password' use your credentials from https://my.dataforseo.com/#api_dashboard
String basicAuth = Base64.getEncoder().encodeToString(("login:password").getBytes("UTF-8"));
get.setHeader("Content-type", "application/json");
get.setHeader("Authorization", "Basic " + basicAuth);
HttpResponse response = client.execute(get);
JSONObject obj = new JSONObject(EntityUtils.toString(response.getEntity()));
if (obj.get("status").equals("error")) {
System.out.println("error. Code:" + obj.getJSONObject("error").get("code") + " Message: " + obj.getJSONObject("error").get("message"));
} else {
JSONArray results = obj.getJSONArray("results");
if (results.length() > 0) {
for (int i = 0; i < results.length(); i++) {
System.out.println(results.get(i));
}
} else {
System.out.println("no results");
}
}
}
}
The above command returns JSON structured like this:
$page – is a relative page address on a site. The page value must be enclosed in single quotes, like this https://api.dataforseo.com/v2/op_tasks_images_on_page/12345/'/page/on/site.html'
You will receive array from the API server in the results field where you will find results.
Field name
Type
Description
status
string
general result
“ok” – successful
“error” – error
if status=“error”, then you can see more detailed information in the error array
error
array
informational array of error only if status=“error”
code
integer
error code
message
string
text description of an error
results_time
string
execution time, seconds
results_count
string
number of elements in the results array
results
array
results array
post_id
string
index in the array received in a POST array
post_site
string
site received in a POST array
task_id
integer
unique task identifier in our system(UInt64)
in the future you will be able to use it within 30 days to request results of this task any time
string_search_containment
string
string_search_containment received in a POST request
default value: ‘null’.
crawl_max_pages
integer
maximum number of test pages
crawl_start
string
date and time of the start of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-14 11:50:01 +02:00’
crawl_end
string
date and time of the end of crawling
in the format year-month-day:GMT_hours:GMT_minutes:time_zone
for example: ‘2017-12-13 15:30:34 +02:00’
if you request the data during the scanning – the value of this field will be ‘null’
status
string
current status of the task
possible values: “in_queue”, “crawling”, “crawled”, “crawl_paused”