#settings里要设置USER_AGENT跳过糗事百科网站反爬虫。
# -*- coding: utf-8 -*-
import scrapy
from qsbk.items import QsbkItem
class ChongSpider(scrapy.Spider):
name = 'chong'
allowed_domains = ['www.qiushibaike.com']
start_urls = ['https://www.qiushibaike.com']
def parse(self, response):
ids=response.xpath('//h2/text()')
next_page=response.xpath('//ul[@class="pagination"]/li/a/@href').extract()[-1]
next_page='https://www.qiushibaike.com'+next_page
#把数据交给items处理
a=QsbkItem()
a['next_page']=next_page
for id in ids:
a['id']=id
yield a
#下一页地址给request解析后回调给parse抓取下一页id
yield scrapy.Request(next_page,callback=self.parse)
scrapy爬糗事百科作者昵称
最新推荐文章于 2020-07-07 23:40:13 发布