from scrapy.spiders import Spider
import scrapy
class SousuoSpider(Spider):
name = 'sousuo_zhuaqu'
start_urls = ['https://www.so.com/']
def start_requests(self):
for i in self.start_urls:
yield scrapy.Request(i , callback=self.parse, meta={'dont_redirect': True, 'handle_httpstatus_list': [302]})
def parse(self,response):
print("output***************************************")
htmls = response.xpath("//input").extract()
print("output***************************************")
for html in htmls:
print(htmls)
print("output***************************************")
print(html.strip())
第一个问题:
现象:访问某地址出现回调,抓取的内容和目标不同。
def start_requests(self):
for i in self.start_urls:
yield scrapy.Request(i , callback=self