spider中的代码
def parse(self, response):
li_list=response.xpath('//*[@id="main"]/div/div[1]/ul/li')
i=1
for li in li_list:
if i <4:
src = li.xpath('./a/img/@src').extract_first()
# print(div.xpath('./img/@style').extract())
i += 1
print(src)
item = ImgsproItem()
item['src'] = src
yield item
else:
break
管道类中的代码:
from scrapy.pipelines.images import ImagesPipeline
import scrapy
class imgsPipeline(ImagesPipeline):
print('kkkkk')
# 根据图片地址进行图片数据请求
def get_media_requests(self, item, info):
yield scrapy.Request(item['src'])
# 指定图片储存的路径
def file_path(self, request, response=Non