Rendering Javascript

If you are crawling a page that requires you to render the Javascript on the page to scrape the data you need, then we can fetch these pages using a headless browser.

To render Javascript, simply set render=true and we will use a headless Google Chrome instance to fetch the page. This feature is available on all plans.

Pass the JavaScript rendering parameter within the URL:

  • API REQUEST

import requests
payload = {'api_key': 'APIKEY', 'url':'https://httpbin.org/ip', 'render': 'true'}
r = requests.get('https://api.scraperapi.com', params=payload)
print(r.text)

# Scrapy users can simply replace the urls in their start_urls and parse function
# ...other scrapy setup code
start_urls = ['https://api.scraperapi.com?api_key=APIKEY&url=' + url + '&render=true']

def parse(self, response):
# ...your parsing logic here
yield scrapy.Request('https://api.scraperapi.com/?api_key=APIKEY&url=' + url + '&render=true', self.parse)
  • PROXY MODE

import requests
proxy_url = "http://scraperapi.render=true:<YOUR_API_KEY>@proxy-server.scraperapi.com:8001"
proxies = {
    "http": proxy_url,
    "https": proxy_url
}
r = requests.get('https://httpbin.org/ip', proxies=proxies, verify=False)
print(r.text)

# Scrapy users can likewise simply pass their API key in headers.
# NB: Scrapy skips SSL verification by default.
# ...other scrapy setup code
start_urls = ['http://httpbin.org/ip']
meta = {
  "proxy": "http://scraperapi.render=true:APIKEY@proxy-server.scraperapi.com:8001"
}
def parse(self, response):
# ...your parsing logic here
yield scrapy.Request(url, callback=self.parse, headers=headers, meta=meta)
  • SDK Method

from scraperapi_sdk import ScraperAPIClient
client = ScraperAPIClient('APIKEY')
result = client.get(url = 'https://httpbin.org/ip', render=true).text
print(result)

# Scrapy users can simply replace the urls in their start_urls and parse function
# Note for Scrapy, you should not use DOWNLOAD_DELAY and
# RANDOMIZE_DOWNLOAD_DELAY, these will lower your concurrency and are not
# needed with our API

# ...other scrapy setup code
start_urls =[client.scrapyGet(url = 'https://httpbin.org/ip', render=true)]
def parse(self, response):

# ...your parsing logic here
yield scrapy.Request(client.scrapyGet(url = 'https://httpbin.org/ip', render=true), self.parse)

Pass the parameter in the headers:

  • API REQUEST

import requests
payload = {'url': 'https://httpbin.org/ip'}
headers = {
    'x-sapi-render': 'true',
    'x-sapi-api_key': '<YOUR_API_KEY>'
}
r = requests.get('https://api.scraperapi.com', params=payload, headers=headers)
print(r.text)

# For Scrapy users: Update the headers to include 'x-sapi-render' and 'x-sapi-api_key'
# ...other scrapy setup code
def start_requests(self):
headers = {
    'x-sapi-render': 'true',
    'x-sapi-api_key': api_key
}
yield scrapy.Request('https://api.scraperapi.com', headers=headers, callback=self.parse)
def parse(self, response):
# ...your parsing logic here
  • PROXY MODE

import requests
proxy_url = "http://scraperapi:<YOUR_API_KEY>@proxy-server.scraperapi.com:8001"
proxies = {
    "http": proxy_url,
    "https": proxy_url
}

headers = {
    'x-sapi-render': 'true',
}
r = requests.get('https://httpbin.org/ip', proxies=proxies, headers=headers, verify=False)
print(r.text)

# Scrapy users can likewise simply pass the parameters in the headers.
# NB: Scrapy skips SSL verification by default.

start_urls = ['http://httpbin.org/ip']
proxy_url = "http://scraperapi:<YOUR_API_KEY>@proxy-server.scraperapi.com:8001"
proxies = {
    "http": proxy_url,
    "https": proxy_url
}
headers = {
    'x-sapi-render': 'true',
}
def parse(self, response):
# ...your parsing logic here
yield scrapy.Request(url, callback=self.parse, headers=headers, proxies=proxies)
  • SDK METHOD

from scraperapi_sdk import ScraperAPIClient
client = ScraperAPIClient('<YOUR_API_KEY>')
headers = {
    'x-sapi-render': 'true'
}
result = client.get(url = 'https://httpbin.org/ip', headers=headers)
print(result)

# Scrapy users can likewise simply pass the parameters in the headers.
# Note for Scrapy, you should not use DOWNLOAD_DELAY and
# RANDOMIZE_DOWNLOAD_DELAY, these will lower your concurrency and are not
# needed with our API

# ...other scrapy setup code
def start_requests(self):
headers = {
    'x-sapi-render': 'true',
    'x-sapi-api_key': api_key
}
yield scrapy.Request('https://api.scraperapi.com', headers=headers, callback=self.parse)
def parse(self, response):
# ...your parsing logic here

Last updated