logo

Output & Response

Customize response

Here we present the parameters where the responses returned by our system can be customized.

Output Format

Scrape.do does support markdown output for LLM data training or other necessary purposes. You can use the output=markdown parameter to obtain the output in markdown format when the response content-type is text/html.

curl --location --request GET 'https://api.scrape.do/?token=YOUR_TOKEN&url=https://httpbin.co/&output=markdown'
import requests
import urllib.parse
token = "YOUR_TOKEN"
targetUrl = urllib.parse.quote("https://httpbin.co/")
url = "http://api.scrape.do/?token={}&url={}&output=markdown".format(token, targetUrl)
response = requests.request("GET", url)
print(response.text)
const axios = require('axios');
const token = "YOUR_TOKEN";
const targetUrl = encodeURIComponent("https://httpbin.co/"); 
const config = {
    'method': 'GET',
    'url': `https://api.scrape.do/?token=${token}&url=${targetUrl}&output=markdown`,
    'headers': {}
};
axios(config)
    .then(function (response) {
        console.log(response.data);
    })
    .catch(function (error) {
        console.log(error);
    });
package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)

func main() {
	token := "YOUR_TOKEN"
	encoded_url := url.QueryEscape("https://httpbin.co/")
	url := fmt.Sprintf("https://api.scrape.do/?token=%s&url=%s&output=markdown", token, encoded_url)
	method := "GET"
	client := &http.Client{}
	req, err := http.NewRequest(method, url, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	res, err := client.Do(req)
	if err != nil {
		fmt.Println(err)
		return
	}
	defer res.Body.Close()
	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println(string(body))
}
require "uri"
require "net/http"
require 'cgi'
str =  CGI.escape "https://httpbin.co/"
url = URI("https://api.scrape.do/?url=" + str + "&token=YOUR_TOKEN&output=markdown")
https = Net::HTTP.new(url.host, url.port)
https.use_ssl = true
request = Net::HTTP::Get.new(url)
response = https.request(request)
puts response.read_body
OkHttpClient client = new OkHttpClient().newBuilder()
  .build();
MediaType mediaType = MediaType.parse("text/plain");
RequestBody body = RequestBody.create(mediaType, "");
String encoded_url = URLEncoder.encode("https://httpbin.co/", "UTF-8");
Request request = new Request.Builder()
  .url("https://api.scrape.do/?token=YOUR_TOKEN&url=" + encoded_url +"&output=markdown")
  .method("GET", body)
  .build();
Response response = client.newCall(request).execute();
string token = "YOUR_TOKEN";
string url = WebUtility.UrlEncode("https://httpbin.co/");
var client = new HttpClient();
var requestURL = $"https://api.scrape.do/?token={token}&url={url}&output=markdown";        
var request = new HttpRequestMessage(HttpMethod.Get, requestURL);
var response = client.SendAsync(request).Result;
var content = response.Content.ReadAsStringAsync().Result;
Console.WriteLine(content);
<?php
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, false);
$data = [
   "url" => "https://httpbin.co/",
   "token" => "YOUR_TOKEN",
   "output" => "markdown",
];
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($curl, CURLOPT_URL, "https://api.scrape.do/?".http_build_query($data));
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
    "Accept: */*",
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
?>

Downloading Pictures & Files

You don't need any extra configuration to download pictures or files. Just send a request with the target URL and get the result.

There is a 4MB response body limit per request in our system. With super proxies, this limit is 2MB.

curl --location --request GET 'http://api.scrape.do/?token=YOUR_TOKEN&url=https://httpbin.co/image'
import requests
import urllib.parse

token = "YOUR_TOKEN"
targetUrl =  urllib.parse.quote("https://httpbin.co/image")
url = "http://api.scrape.do/?token={}&url={}".format(token, targetUrl)
response = requests.request("GET", url)
print(response.text)
const axios = require('axios');
const token = "YOUR_TOKEN";
const targetUrl = encodeURIComponent("https://httpbin.co/image");
const config = {
    'method': 'GET',
    'url': `https://api.scrape.do/?token=${token}&url=${targetUrl}`,
    'headers': {}
};
axios(config)
    .then(function (response) {
        console.log(response.data);
    })
    .catch(function (error) {
        console.log(error);
    });
package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)

func main() {
	token := "YOUR_TOKEN"
	encoded_url := url.QueryEscape("https://httpbin.co/image")
	url := fmt.Sprintf("https://api.scrape.do/?token=%s&url=%s", token, encoded_url)
	method := "GET"
	client := &http.Client{}
	req, err := http.NewRequest(method, url, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	res, err := client.Do(req)
	if err != nil {
		fmt.Println(err)
		return
	}
	defer res.Body.Close()
	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println(string(body))
}
require "uri"
require "net/http"
require 'cgi'

str = CGI.escape "https://httpbin.co/image"
url = URI("https://api.scrape.do/?url=" + str + "&token=YOUR_TOKEN")
https = Net::HTTP.new(url.host, url.port)
https.use_ssl = true
request = Net::HTTP::Get.new(url)
response = https.request(request)
puts response.read_body
OkHttpClient client = new OkHttpClient().newBuilder()
.build();
MediaType mediaType = MediaType.parse("text/plain");
RequestBody body = RequestBody.create(mediaType, "");
String encoded_url = URLEncoder.encode("https://httpbin.co/image", "UTF-8");
Request request = new Request.Builder()
.url("https://api.scrape.do/?token=YOUR_URL&url=" + encoded_url +"")
.method("GET", body)
.build();
Response response = client.newCall(request).execute();
string token = "YOUR_TOKEN";
string url = WebUtility.UrlEncode("https://httpbin.co/image");
var client = new HttpClient();
var requestURL = $"https://api.scrape.do/?token={token}&url={url}";
var request = new HttpRequestMessage(HttpMethod.Get, requestURL);
var response = client.SendAsync(request).Result;
var content = response.Content.ReadAsStringAsync().Result;
Console.WriteLine(content);
<?php
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, false);
$data = [
   "url" => "https://httpbin.co/image",
   "token" => "YOUR_TOKEN"
];
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($curl, CURLOPT_URL, "https://api.scrape.do/?".http_build_query($data));
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
    "Accept: */*",
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
?>

Transparent Response

By default, Scrape.do returns status codes specified by our system. However, in some cases, you may want to use the exact status codes that the target web page returns.

In such cases, simply pass the transparentResponse=true parameter.

curl --location --request GET 'https://api.scrape.do/?token=YOUR_TOKEN&url=https://httpbin.co/anything&transparentResponse=true'
import requests
import urllib.parse
token = "YOUR_TOKEN"
targetUrl = urllib.parse.quote("https://httpbin.co/anything")
transparentResponse = "true"
url = "http://api.scrape.do/?token={}&url={}&transparentResponse={}".format(token, targetUrl,transparentResponse)
response = requests.request("GET", url)
print(response.text)
const axios = require('axios');
const token = "YOUR_TOKEN";
const targetUrl = encodeURIComponent("https://httpbin.co/anything");
const transparentResponse = "true"
const config = {
    'method': 'GET',
    'url': `https://api.scrape.do/?token=${token}&url=${targetUrl}&transparentResponse=${transparentResponse}`,
    'headers': {}
};
axios(config)
    .then(function (response) {
        console.log(response.data);
    })
    .catch(function (error) {
        console.log(error);
    });
package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)

func main() {
	token := "YOUR_TOKEN"
	encoded_url := url.QueryEscape("https://httpbin.co/anything")
	url := fmt.Sprintf("https://api.scrape.do/?token=%s&url=%s&transparentResponse=true", token, encoded_url)
	method := "GET"
	client := &http.Client{}
	req, err := http.NewRequest(method, url, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	res, err := client.Do(req)
	if err != nil {
		fmt.Println(err)
		return
	}
	defer res.Body.Close()

	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println(string(body))
}
require "uri"
require "net/http"
require 'cgi'
str = CGI.escape "https://httpbin.co/anything"
url = URI("https://api.scrape.do/?url=" + str + "&token=YOUR_TOKEN&transparentResponse=true")
https = Net::HTTP.new(url.host, url.port)
https.use_ssl = true
request = Net::HTTP::Get.new(url)
response = https.request(request)
puts response.read_body
OkHttpClient client = new OkHttpClient().newBuilder()
  .build();
MediaType mediaType = MediaType.parse("text/plain");
RequestBody body = RequestBody.create(mediaType, "");
String encoded_url = URLEncoder.encode("https://httpbin.co/anything", "UTF-8");
Request request = new Request.Builder()
  .url("https://api.scrape.do/?token=YOUR_TOKEN&url="encoded_url"&transparentResponse=true")
  .method("GET", body)
  .build();
Response response = client.newCall(request).execute();
string token = "YOUR_TOKEN";
string url = WebUtility.UrlEncode("https://httpbin.co/anything");
var client = new HttpClient();
var requestURL = $"https://api.scrape.do/?token={token}&url={url}&transparentResponse=true";        
var request = new HttpRequestMessage(HttpMethod.Get, requestURL);
var response = client.SendAsync(request).Result;
var content = response.Content.ReadAsStringAsync().Result;
Console.WriteLine(content);
<?php
$curl = curl_init();

curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, false);
$data = [
   "url" => "https://httpbin.co/anything",
   "token" => "YOUR_TOKEN",
   "transparentResponse" => "true"
];
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($curl, CURLOPT_URL, "https://api.scrape.do/?".http_build_query($data));
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
    "Accept: */*",
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
?>

Response Headers

By default, Scrape.do will return all the header information it receives from the target website. Our system also aims to make your work easier by adding the following header parameters to the response.

HeaderDescription
Scrape.do-CookiesAll cookie information returned from the target web page, joined with ';' and returned in one header
Scrape.do-Remaining-CreditsTotal credits remaining in your subscription
Scrape.do-Request-CostIndicates how many credits the request consumed
Scrape.do-Resolved-UrlThe final URL after following redirects. Useful for tracking where requests end up
Scrape.do-Target-UrlThe original URL of the target web page you requested
Scrape.do-Initial-Status-CodeThe first response status code received from the target website. In responses with redirects, this will show the initial 30X code
Scrape.do-Target-Redirected-LocationWhen disableRedirection=true, this shows the Location header from the target website's redirect response

Pure Cookies

When scraping websites, you may need to process cookies in their original format. By default, Scrape.do returns cookies in a special header called Scrape.do-Cookies. However, if you need to access the original Set-Cookie headers returned by the target website, you can enable the pureCookies parameter.

When set to pureCookies=true, this parameter returns the original Set-Cookie headers from the target website instead of the processed Scrape.do-Cookies format.

curl --location --request GET 'https://api.scrape.do/?token=YOUR_TOKEN&url=https://www.instagram.com/api/v1/users/web_profile_info/?username=google&pureCookies=true'
import requests
import urllib.parse

token = "YOUR_TOKEN"
targetUrl = urllib.parse.quote("https://www.instagram.com/api/v1/users/web_profile_info/?username=google")
url = "http://api.scrape.do/?token={}&url={}&pureCookies=true".format(token, targetUrl)
response = requests.request("GET", url)
for key, value in response.headers.items():
    print(f"{key}: {value}")
const axios = require('axios');
const token = "YOUR_TOKEN";
const targetUrl = encodeURIComponent("https://www.instagram.com/api/v1/users/web_profile_info/?username=google");
const config = {
    'method': 'GET',
    'url': `https://api.scrape.do/?token=${token}&url=${targetUrl}&pureCookies=true`,
    'headers': {}
};
axios(config)
    .then(function (response) {
        console.log("HEADERS:", JSON.stringify(response.headers, null, 2));
    })
    .catch(function (error) {
        console.log(error);
    });
package main

import (
	"fmt"
	"net/http"
	"net/url"
)

func main() {
	token := "YOUR_TOKEN"
	encoded_url := url.QueryEscape("https://www.instagram.com/api/v1/users/web_profile_info/?username=google")
	url := fmt.Sprintf("https://api.scrape.do/?token=%s&url=%s&pureCookies=true", token, encoded_url)
	method := "GET"
	client := &http.Client{}
	req, err := http.NewRequest(method, url, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	res, err := client.Do(req)
	if err != nil {
		fmt.Println(err)
		return
	}
	defer res.Body.Close()
	fmt.Println("HEADERS:")
	for key, values := range res.Header {
		for _, value := range values {
			fmt.Printf("%s: %s\n", key, value)
		}
	}
}
require "uri"
require "net/http"
require 'cgi'

str = CGI.escape "https://www.instagram.com/api/v1/users/web_profile_info/?username=google"
url = URI("https://api.scrape.do/?url=" + str + "&token=YOUR_TOKEN&pureCookies=true")
https = Net::HTTP.new(url.host, url.port)
https.use_ssl = true
request = Net::HTTP::Get.new(url)
response = https.request(request)
puts "HEADERS:"
response.each_header do |key, value|
  puts "#{key}: #{value}"
end
OkHttpClient client = new OkHttpClient().newBuilder()
  .build();
MediaType mediaType = MediaType.parse("text/plain");
RequestBody body = RequestBody.create(mediaType, "");
String encoded_url = URLEncoder.encode("https://www.instagram.com/api/v1/users/web_profile_info/?username=google", "UTF-8");
Request request = new Request.Builder()
  .url("https://api.scrape.do/?token=YOUR_TOKEN&url=" + encoded_url +"&pureCookies=true")
  .method("GET", body)
  .build();
Response response = client.newCall(request).execute();

System.out.println("Status Code: " + response.code());
System.out.println("\nHEADERS:");
Headers headers = response.headers();
for (int i = 0; i < headers.size(); i++) {
    System.out.println(headers.name(i) + ": " + headers.value(i));
}
string token = "YOUR_TOKEN";
string url = WebUtility.UrlEncode("https://www.instagram.com/api/v1/users/web_profile_info/?username=google");
var client = new HttpClient();
var requestURL = $"https://api.scrape.do/?token={token}&url={url}&pureCookies=true";        
var request = new HttpRequestMessage(HttpMethod.Get, requestURL);
var response = client.SendAsync(request).Result;
Console.WriteLine($"Status Code: {(int)response.StatusCode} {response.StatusCode}");
Console.WriteLine("\nHEADERS:");
foreach (var header in response.Headers)
{
    Console.WriteLine($"{header.Key}: {string.Join(", ", header.Value)}");
}
<?php
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, true);
$data = [
   "url" => "https://www.instagram.com/api/v1/users/web_profile_info/?username=google",
   "token" => "YOUR_TOKEN",
   "pureCookies" => "true",
];
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($curl, CURLOPT_URL, "https://api.scrape.do/?".http_build_query($data));
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
    "Accept: */*",
));
$response = curl_exec($curl);
$header_size = curl_getinfo($curl, CURLINFO_HEADER_SIZE);
$header = substr($response, 0, $header_size);
curl_close($curl);
echo "HEADERS:\n";
$headers = explode("\r\n", $header);
foreach($headers as $h) {
    if(trim($h) !== '') {
        echo $h . "\n";
    }
}
?>

Example Result

Pure Cookies Response