middleware配置:
class HandlessMiddleware(object):
def __init__(self):
super(HandlessMiddleware, self).__init__()
option = webdriver.ChromeOptions()
# option.add_argument('headless')
option.add_argument('user-agent=' + UserAgent().random)
self.browser = webdriver.Chrome(chrome_options=option)
def process_request(self, request, spider):
self.browser.implicitly_wait(5)
self.browser.get(request.url)
print("GET : "+request.url)
return HtmlResponse(url=self.browser.current_url, body=self.browser.page_source, encoding="utf-8",request=request)
然后在setting中添加即可
参考链接:
https://kuangshp1.gitbooks.io/scrapy-book/content/chapter09/10.html