For select websites the API will parse all the valuable data in the HTML response and return it in JSON format. To enable this feature, simply add autoparse=true to your request and the API will parse the data for you. Currently, this feature works with Amazon, Google Search, Google Shopping and Walmart.
API REQUEST
import requestspayload ={'api_key':'APIKEY','autoparse':'true','url':'https://www.amazon.com/dp/B07V1PHM66'}r = requests.get('https://api.scraperapi.com', params=payload)print(r.text)# Scrapy users can simply replace the urls in their start_urls and parse function# ...other scrapy setup codestart_urls = ['https://api.scraperapi.com?api_key=APIKEY&url='+ url +'autoparse=true']defparse(self,response):# ...your parsing logic hereyield scrapy.Request('http://api.scraperapi.com/?api_key=APIKEY&url='+ url +'autoparse=true', self.parse)
PROXY MODE
import requestsproxies ={"http":"http://scraperapi.autoparse=true:APIKEY@proxy-server.scraperapi.com:8001"}r = requests.get('https://www.amazon.com/dp/B07V1PHM66', proxies=proxies, verify=False)print(r.text)# Scrapy users can likewise simply pass their API key in headers.# NB: Scrapy skips SSL verification by default.# ...other scrapy setup codestart_urls = ['https://www.amazon.com/dp/B07V1PHM66']meta ={"proxy":"http://scraperapi.autoparse=true:APIKEY@proxy-server.scraperapi.com:8001"}defparse(self,response):# ...your parsing logic hereyield scrapy.Request(url, callback=self.parse, headers=headers, meta=meta)
SDK METHOD
//from scraperapi_sdk import ScraperAPIClientclient =ScraperAPIClient('APIKEY')result = client.get(url ='https://www.amazon.com/dp/B07V1PHM66', autoparse=true).textprint(result)# Scrapy users can simply replace the urls in their start_urls and parse function# Note for Scrapy, you should not use DOWNLOAD_DELAY and# RANDOMIZE_DOWNLOAD_DELAY, these will lower your concurrency and are not# needed with our API# ...other scrapy setup codestart_urls =[client.scrapyGet(url ='https://www.amazon.com/dp/B07V1PHM66', autoparse=true)]defparse(self,response):# ...your parsing logic hereyield scrapy.Request(client.scrapyGet(url ='https://www.amazon.com/dp/B07V1PHM66', autoparse=true), self.parse)