一、管道存储操作
import scrapy
from scrapy.pipelines.images import ImagesPipeline
class XhxhPipeline(object):
def process_item(self, item, spider):
return item
class ImgPipeLine(ImagesPipeline):
def get_media_requests(self, item, info):
yield scrapy.Request(url=item['img_link'])
def file_path(self, request, response=None, info=None):
url = request.url
file_name = url.split('/')[-1] #将网址切割后 取最后一段作为存储的名字
return file_name
def item_completed(self, results, item, info):
return item
二、在配置文件中 注册Imagepipeline
ITEM_PIPELINES = {
'xhxh.pipelines.XhxhPipeline': 300, #默认的存储管道
'xhxh.pipelines.ImgPipeLine': 301, # 新配置的存储管道
}
IMAGES_STORE = './mvs' # 存储的文件夹路径(有的话就会在里面存储,没有回新建mvs文件)