1.这里是jobbole的spider中的代码
class JobboleSpider(scrapy.Spider):
name = "jobbole"
allowed_domains = ["python.jobbole.com"]
start_urls = ['http://python.jobbole.com/all-posts/']
# 自定义设置
custom_settings = {
"DOWNLOAD_DELAY": 2,
}
def __init__(self):
# 通过selenium启动一个chrome浏览器
# 将webdriver放到spider中可以达到更好的并发效果,也可以在爬虫结束时关闭 Chrome
self.brower = webdriver.Chrome(executable_path=r"C:\scrapy\resource\chromedriver.exe")
super(JobboleSpider, self).__init__()
dispatcher.connect(self.close_brower, signals.spider_closed)
def close_brower(self):
# 当爬虫退出的时候关闭chrome
print("spider_closed close_brower")
self.brower.quit()