直接上代码
中间件代码
SeleniumMiddleware中间件函数
from scrapy import signals
from selenium import webdriver
from scrapy.http.response.html import HtmlResponse
import time
class SeleniumMiddleware(object):
# 使用selenume 请求网页源码
def __init__(self):
self.driver = webdriver.Chrome(executable_path=r'D:\selenium\chromedriver_win32 (1)\chromedriver.exe')
def process_request(self, request, spider):
# 使用selenium打开请求的网页
self.driver.get(request.url)
# 获取网页的源代码
source = self.driver.page_source
# 把获取到的源代码返回到响应对象中
response = HtmlResponse(url=self.driver.current_url
, body=source, request=request
, encoding='utf-8')
return response
SeleniumMiddleware
import scrapy
from jd_spider.items import JdS