废话不多说,上代码
1、以下代码为spider文件
import scrapy from car_home.items import che168Item class Che168Spider(scrapy.Spider): name = 'che168' allowed_domains = ['che168.com'] start_urls = ['https://www.che168.com/beijing/list/'] def parse(self, response): #获取多个列表页链接,爬取多页数据 max_page = response.xpath('//div[@id="listpagination"]/a/text()')[-1].extract() base_url = 'https://www.che168.com/beijing/a0_0msdgscncgpi1ltocsp{}exx0/' for i in range(1,int(max_page)+1): url = base_url.format(i) yield scrapy.Request(url,callback=