Output & Response

Here we present the parameters where the responses returned by our system can be customized.

Output Format

Scrape.do does support markdown output for LLM data training or other necessary purposes. You can use the output=markdown parameter to obtain the output in markdown format when the response content-type is text/html.

Proxy Mode

curl --location --request GET 'https://api.scrape.do/?token=YOUR_TOKEN&url=https://httpbin.co/&output=markdown'

import requests
import urllib.parse
token = "YOUR_TOKEN"
targetUrl = urllib.parse.quote("https://httpbin.co/")
url = "http://api.scrape.do/?token={}&url={}&output=markdown".format(token, targetUrl)
response = requests.request("GET", url)
print(response.text)

const axios = require('axios');
const token = "YOUR_TOKEN";
const targetUrl = encodeURIComponent("https://httpbin.co/"); 
const config = {
    'method': 'GET',
    'url': `https://api.scrape.do/?token=${token}&url=${targetUrl}&output=markdown`,
    'headers': {}
};
axios(config)
    .then(function (response) {
        console.log(response.data);
    })
    .catch(function (error) {
        console.log(error);
    });

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)

func main() {
	token := "YOUR_TOKEN"
	encoded_url := url.QueryEscape("https://httpbin.co/")
	url := fmt.Sprintf("https://api.scrape.do/?token=%s&url=%s&output=markdown", token, encoded_url)
	method := "GET"
	client := &http.Client{}
	req, err := http.NewRequest(method, url, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	res, err := client.Do(req)
	if err != nil {
		fmt.Println(err)
		return
	}
	defer res.Body.Close()
	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println(string(body))
}

require "uri"
require "net/http"
require 'cgi'
str =  CGI.escape "https://httpbin.co/"
url = URI("https://api.scrape.do/?url=" + str + "&token=YOUR_TOKEN&output=markdown")
https = Net::HTTP.new(url.host, url.port)
https.use_ssl = true
request = Net::HTTP::Get.new(url)
response = https.request(request)
puts response.read_body

OkHttpClient client = new OkHttpClient().newBuilder()
  .build();
MediaType mediaType = MediaType.parse("text/plain");
RequestBody body = RequestBody.create(mediaType, "");
String encoded_url = URLEncoder.encode("https://httpbin.co/", "UTF-8");
Request request = new Request.Builder()
  .url("https://api.scrape.do/?token=YOUR_TOKEN&url=" + encoded_url +"&output=markdown")
  .method("GET", body)
  .build();
Response response = client.newCall(request).execute();

string token = "YOUR_TOKEN";
string url = WebUtility.UrlEncode("https://httpbin.co/");
var client = new HttpClient();
var requestURL = $"https://api.scrape.do/?token={token}&url={url}&output=markdown";        
var request = new HttpRequestMessage(HttpMethod.Get, requestURL);
var response = client.SendAsync(request).Result;
var content = response.Content.ReadAsStringAsync().Result;
Console.WriteLine(content);

<?php
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, false);
$data = [
   "url" => "https://httpbin.co/",
   "token" => "YOUR_TOKEN",
   "output" => "markdown",
];
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($curl, CURLOPT_URL, "https://api.scrape.do/?".http_build_query($data));
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
    "Accept: */*",
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
?>

Downloading Pictures & Files

You don't need any extra configuration to download pictures or files. Just send a request with the target URL and get the result.

There is a 4MB response body limit per request in our system. With super proxies, this limit is 2MB.

Proxy Mode

curl --location --request GET 'http://api.scrape.do/?token=YOUR_TOKEN&url=https://httpbin.co/image'

import requests
import urllib.parse

token = "YOUR_TOKEN"
targetUrl =  urllib.parse.quote("https://httpbin.co/image")
url = "http://api.scrape.do/?token={}&url={}".format(token, targetUrl)
response = requests.request("GET", url)
print(response.text)

const axios = require('axios');
const token = "YOUR_TOKEN";
const targetUrl = encodeURIComponent("https://httpbin.co/image");
const config = {
    'method': 'GET',
    'url': `https://api.scrape.do/?token=${token}&url=${targetUrl}`,
    'headers': {}
};
axios(config)
    .then(function (response) {
        console.log(response.data);
    })
    .catch(function (error) {
        console.log(error);
    });

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)

func main() {
	token := "YOUR_TOKEN"
	encoded_url := url.QueryEscape("https://httpbin.co/image")
	url := fmt.Sprintf("https://api.scrape.do/?token=%s&url=%s", token, encoded_url)
	method := "GET"
	client := &http.Client{}
	req, err := http.NewRequest(method, url, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	res, err := client.Do(req)
	if err != nil {
		fmt.Println(err)
		return
	}
	defer res.Body.Close()
	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println(string(body))
}

require "uri"
require "net/http"
require 'cgi'

str = CGI.escape "https://httpbin.co/image"
url = URI("https://api.scrape.do/?url=" + str + "&token=YOUR_TOKEN")
https = Net::HTTP.new(url.host, url.port)
https.use_ssl = true
request = Net::HTTP::Get.new(url)
response = https.request(request)
puts response.read_body

OkHttpClient client = new OkHttpClient().newBuilder()
.build();
MediaType mediaType = MediaType.parse("text/plain");
RequestBody body = RequestBody.create(mediaType, "");
String encoded_url = URLEncoder.encode("https://httpbin.co/image", "UTF-8");
Request request = new Request.Builder()
.url("https://api.scrape.do/?token=YOUR_URL&url=" + encoded_url +"")
.method("GET", body)
.build();
Response response = client.newCall(request).execute();

string token = "YOUR_TOKEN";
string url = WebUtility.UrlEncode("https://httpbin.co/image");
var client = new HttpClient();
var requestURL = $"https://api.scrape.do/?token={token}&url={url}";
var request = new HttpRequestMessage(HttpMethod.Get, requestURL);
var response = client.SendAsync(request).Result;
var content = response.Content.ReadAsStringAsync().Result;
Console.WriteLine(content);

<?php
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, false);
$data = [
   "url" => "https://httpbin.co/image",
   "token" => "YOUR_TOKEN"
];
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($curl, CURLOPT_URL, "https://api.scrape.do/?".http_build_query($data));
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
    "Accept: */*",
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
?>

Transparent Response

By default, Scrape.do returns status codes specified by our system. However, in some cases, you may want to use the exact status codes that the target web page returns.

In such cases, simply pass the transparentResponse=true parameter.

Proxy Mode

curl --location --request GET 'https://api.scrape.do/?token=YOUR_TOKEN&url=https://httpbin.co/anything&transparentResponse=true'

import requests
import urllib.parse
token = "YOUR_TOKEN"
targetUrl = urllib.parse.quote("https://httpbin.co/anything")
transparentResponse = "true"
url = "http://api.scrape.do/?token={}&url={}&transparentResponse={}".format(token, targetUrl,transparentResponse)
response = requests.request("GET", url)
print(response.text)

const axios = require('axios');
const token = "YOUR_TOKEN";
const targetUrl = encodeURIComponent("https://httpbin.co/anything");
const transparentResponse = "true"
const config = {
    'method': 'GET',
    'url': `https://api.scrape.do/?token=${token}&url=${targetUrl}&transparentResponse=${transparentResponse}`,
    'headers': {}
};
axios(config)
    .then(function (response) {
        console.log(response.data);
    })
    .catch(function (error) {
        console.log(error);
    });

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)

func main() {
	token := "YOUR_TOKEN"
	encoded_url := url.QueryEscape("https://httpbin.co/anything")
	url := fmt.Sprintf("https://api.scrape.do/?token=%s&url=%s&transparentResponse=true", token, encoded_url)
	method := "GET"
	client := &http.Client{}
	req, err := http.NewRequest(method, url, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	res, err := client.Do(req)
	if err != nil {
		fmt.Println(err)
		return
	}
	defer res.Body.Close()

	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println(string(body))
}

require "uri"
require "net/http"
require 'cgi'
str = CGI.escape "https://httpbin.co/anything"
url = URI("https://api.scrape.do/?url=" + str + "&token=YOUR_TOKEN&transparentResponse=true")
https = Net::HTTP.new(url.host, url.port)
https.use_ssl = true
request = Net::HTTP::Get.new(url)
response = https.request(request)
puts response.read_body

OkHttpClient client = new OkHttpClient().newBuilder()
  .build();
MediaType mediaType = MediaType.parse("text/plain");
RequestBody body = RequestBody.create(mediaType, "");
String encoded_url = URLEncoder.encode("https://httpbin.co/anything", "UTF-8");
Request request = new Request.Builder()
  .url("https://api.scrape.do/?token=YOUR_TOKEN&url="encoded_url"&transparentResponse=true")
  .method("GET", body)
  .build();
Response response = client.newCall(request).execute();

string token = "YOUR_TOKEN";
string url = WebUtility.UrlEncode("https://httpbin.co/anything");
var client = new HttpClient();
var requestURL = $"https://api.scrape.do/?token={token}&url={url}&transparentResponse=true";        
var request = new HttpRequestMessage(HttpMethod.Get, requestURL);
var response = client.SendAsync(request).Result;
var content = response.Content.ReadAsStringAsync().Result;
Console.WriteLine(content);

<?php
$curl = curl_init();

curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, false);
$data = [
   "url" => "https://httpbin.co/anything",
   "token" => "YOUR_TOKEN",
   "transparentResponse" => "true"
];
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($curl, CURLOPT_URL, "https://api.scrape.do/?".http_build_query($data));
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
    "Accept: */*",
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
?>

Response Headers

By default, Scrape.do will return all the header information it receives from the target website. Our system also aims to make your work easier by adding the following header parameters to the response.

Header	Description
Scrape.do-Cookies	All cookie information returned from the target web page, joined with `';'` and returned in one header
Scrape.do-Remaining-Credits	Total credits remaining in your subscription
Scrape.do-Request-Cost	Indicates how many credits the request consumed
Scrape.do-Resolved-Url	The final URL after following redirects. Useful for tracking where requests end up
Scrape.do-Target-Url	The original URL of the target web page you requested
Scrape.do-Initial-Status-Code	The first response status code received from the target website. In responses with redirects, this will show the initial 30X code
Scrape.do-Target-Redirected-Location	When `disableRedirection=true`, this shows the `Location` header from the target website's redirect response

Pure Cookies

When scraping websites, you may need to process cookies in their original format. By default, Scrape.do returns cookies in a special header called Scrape.do-Cookies. However, if you need to access the original Set-Cookie headers returned by the target website, you can enable the pureCookies parameter.

When set to pureCookies=true, this parameter returns the original Set-Cookie headers from the target website instead of the processed Scrape.do-Cookies format.

Proxy Mode

curl --location --request GET 'https://api.scrape.do/?token=YOUR_TOKEN&url=https://www.instagram.com/api/v1/users/web_profile_info/?username=google&pureCookies=true'

import requests
import urllib.parse

token = "YOUR_TOKEN"
targetUrl = urllib.parse.quote("https://www.instagram.com/api/v1/users/web_profile_info/?username=google")
url = "http://api.scrape.do/?token={}&url={}&pureCookies=true".format(token, targetUrl)
response = requests.request("GET", url)
for key, value in response.headers.items():
    print(f"{key}: {value}")

const axios = require('axios');
const token = "YOUR_TOKEN";
const targetUrl = encodeURIComponent("https://www.instagram.com/api/v1/users/web_profile_info/?username=google");
const config = {
    'method': 'GET',
    'url': `https://api.scrape.do/?token=${token}&url=${targetUrl}&pureCookies=true`,
    'headers': {}
};
axios(config)
    .then(function (response) {
        console.log("HEADERS:", JSON.stringify(response.headers, null, 2));
    })
    .catch(function (error) {
        console.log(error);
    });

package main

import (
	"fmt"
	"net/http"
	"net/url"
)

func main() {
	token := "YOUR_TOKEN"
	encoded_url := url.QueryEscape("https://www.instagram.com/api/v1/users/web_profile_info/?username=google")
	url := fmt.Sprintf("https://api.scrape.do/?token=%s&url=%s&pureCookies=true", token, encoded_url)
	method := "GET"
	client := &http.Client{}
	req, err := http.NewRequest(method, url, nil)
	if err != nil {
		fmt.Println(err)
		return
	}
	res, err := client.Do(req)
	if err != nil {
		fmt.Println(err)
		return
	}
	defer res.Body.Close()
	fmt.Println("HEADERS:")
	for key, values := range res.Header {
		for _, value := range values {
			fmt.Printf("%s: %s\n", key, value)
		}
	}
}

require "uri"
require "net/http"
require 'cgi'

str = CGI.escape "https://www.instagram.com/api/v1/users/web_profile_info/?username=google"
url = URI("https://api.scrape.do/?url=" + str + "&token=YOUR_TOKEN&pureCookies=true")
https = Net::HTTP.new(url.host, url.port)
https.use_ssl = true
request = Net::HTTP::Get.new(url)
response = https.request(request)
puts "HEADERS:"
response.each_header do |key, value|
  puts "#{key}: #{value}"
end

OkHttpClient client = new OkHttpClient().newBuilder()
  .build();
MediaType mediaType = MediaType.parse("text/plain");
RequestBody body = RequestBody.create(mediaType, "");
String encoded_url = URLEncoder.encode("https://www.instagram.com/api/v1/users/web_profile_info/?username=google", "UTF-8");
Request request = new Request.Builder()
  .url("https://api.scrape.do/?token=YOUR_TOKEN&url=" + encoded_url +"&pureCookies=true")
  .method("GET", body)
  .build();
Response response = client.newCall(request).execute();

System.out.println("Status Code: " + response.code());
System.out.println("\nHEADERS:");
Headers headers = response.headers();
for (int i = 0; i < headers.size(); i++) {
    System.out.println(headers.name(i) + ": " + headers.value(i));
}

string token = "YOUR_TOKEN";
string url = WebUtility.UrlEncode("https://www.instagram.com/api/v1/users/web_profile_info/?username=google");
var client = new HttpClient();
var requestURL = $"https://api.scrape.do/?token={token}&url={url}&pureCookies=true";        
var request = new HttpRequestMessage(HttpMethod.Get, requestURL);
var response = client.SendAsync(request).Result;
Console.WriteLine($"Status Code: {(int)response.StatusCode} {response.StatusCode}");
Console.WriteLine("\nHEADERS:");
foreach (var header in response.Headers)
{
    Console.WriteLine($"{header.Key}: {string.Join(", ", header.Value)}");
}

<?php
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, true);
$data = [
   "url" => "https://www.instagram.com/api/v1/users/web_profile_info/?username=google",
   "token" => "YOUR_TOKEN",
   "pureCookies" => "true",
];
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($curl, CURLOPT_URL, "https://api.scrape.do/?".http_build_query($data));
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
    "Accept: */*",
));
$response = curl_exec($curl);
$header_size = curl_getinfo($curl, CURLINFO_HEADER_SIZE);
$header = substr($response, 0, $header_size);
curl_close($curl);
echo "HEADERS:\n";
$headers = explode("\r\n", $header);
foreach($headers as $h) {
    if(trim($h) !== '') {
        echo $h . "\n";
    }
}
?>