在进行scrapy爬虫时,要爬取不在同一级页面的属性,使用yield scrapy.Request(url=videolink, meta={'item':item}, callback=self.parseReal2)
来进行item的传递,这样就可以爬取不在同一级页面的属性,并yield给最后的item了。
def parseReal(self, response):
jsons = json.loads(response.body)
res = jsons['data']
if len(res):
for index in range(len(res)):
item = YoukuItem()
videolink = "https:" + res[index]['videoLink']
#视频Id
item['videoid'] = res[index]['videoId']
#视频url
item['url'] = videolink
#视频名称
item['videoname'] = res[index]['title'].encode('utf-8')
yield scrapy.Request(url=videolink, meta={'item':item}, callback=self.parseReal2)
def parseReal2(self, response):