# Following links using XPathnext_page=response.xpath('//a[@class="next-page"]/@href').extract_first()yieldscrapy.Request(url=next_page,callback=self.parse)# Following links using CSSnext_page=response.css('a.next-page::attr(href)').extract_first()yieldscrapy.Request(url=next_page,callback=self.parse)
Item Pipeline
# Define items in items.pyclassMyItem(scrapy.Item):field1=scrapy.Field()field2=scrapy.Field()# Use in spiderdefparse(self,response):item=MyItem()item['field1']=response.css('...').extract_first()item['field2']=response.css('...').extract_first()yielditem
Middlewares
# Example middleware to set a custom User-AgentclassCustomUserAgentMiddleware:defprocess_request(self,request,spider):request.headers['User-Agent']='Custom User Agent'
Setting User Agent
# Set user-agent in settings.pyUSER_AGENT='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
Exporting Data
# Export data to CSVscrapycrawlspider_name-ooutput.csv# Export data to JSONscrapycrawlspider_name-ooutput.json
Debugging
# Run spider in debug modescrapycrawlspider_name-ooutput.json-tjson--nolog