直接代码
import scrapy
from imgProject.items import ImgprojectItem
class ImgfirstSpider(scrapy.Spider):
name = 'imgfirst'
# allowed_domains = ['www.xxx.com']
start_urls = ['https://sc.chinaz.com/tupian/']
def parse(self, response):
div_list = response.xpath('//*[@id="container"]/div')
for div in div_list:
src = 'https:' + div.xpath('./div/a/img/@src2').extract_first()
print(src)
item = ImgprojectItem()
item['src'] = src
yield item
class ImgprojectItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
src = scrapy.Field()
ROBOTSTXT_OBEY = False
LOG_LEVEL = 'ERROR'
USER_AGENT = 请求中复制
ITEM_PIPELINES = {
# 'imgProject.pipelines.ImgprojectPipeline': 300,
'imgProject.pipelines.imgsPipeLine': 300,
}
IMAGES_STORE = './imgs'
import scrapy
from scrapy.pipelines.images import ImagesPipeline
# class ImgprojectPipeline:
# def process_item(self, item, spider):
# return item
class imgsPipeLine(ImagesPipeline):
def get_media_requests(self, item, info):
print('get_media_request')
yield scrapy.Request(item['src'])
def file_path(self, request, response=None, info=None):
imgName = request.url.split('/')[-1]
print('file_path')
return imgName
def item_completed(self, results, item, info):
print('item_completed')
return item
小结: URL没有问题就是不能下载图片
代码没有问题看了网上好多解答,终于看到大佬的解答,人瞬间蛋疼起来
就是没有下载标题上的哪个库,