自定义两层路径的时候,item是需要经过传值的,爬虫函数如下
import scrapy
from urllib.request import urljoin
from ..items import OffmymindspiderItem
class OffmymindSpider(scrapy.Spider):
name = 'offmymind'
allowed_domains = ['www.biaobaiju.com']
start_urls = ['http://www.biaobaiju.com/']
def parse(self, response):
"""
获取每个分类的地址和分类的名称
:param response:
:return:
"""
a_list = response.xpath("//ul[@class='nav clearfix']/li/a")
for a in a_list:
img_type_url = a.xpath("@href").extract_first("")
img_type_name = a.xpath("text()").extract_first("")
yield scrapy.Request(url=img_type_url, dont_filter=False, callback=self.parse_img_type_info, meta={"img_type_name":img_type_name})
def parse_img_