class ZolPipeline(ImagesPipeline):
num = 1
def get_media_requests(self, item, info):
image_url = item[“image_urls”]
if image_url:
self.num + 1
yield Request(url=image_url, meta={“item”: item})
def file_path(self, request, response=None, info=None):
start of deprecation warning block (can be removed in the future)
def _warn():
from scrapy.exceptions import ScrapyDeprecationWarning
import warnings
warnings.warn('ImagesPipeline.image_key(url) and file_key(url) methods are deprecated, ’
‘please use file_path(request, response=None, info=None) instead’,
category=ScrapyDeprecationWarning, stacklevel=1)
check if called from image_key or file_key with url as first argument
if not isinstance(request, Request):
_warn()
url = request
else:
url = request.url
detect if file_key() or image_key() methods have been overridden
if not hasattr(self.file_key, ‘_base’):
_warn()
return self.file_key(url)
elif not hasattr(self.image_key, ‘_base’):
_warn()
return self.image_key(url)
end of deprecation warning block
return ‘desk/{}.jpg’.format(request.meta[“item”][“image_title”])
middlewares.py
from scrapy import signals
from zol2.useragents import agents
class Zol2SpiderMiddleware(object):
Not all methods need to be defined. If a method is not defined,
scrapy acts as if the spider middleware does not modify the
passed objects.
@classmethod
def from_crawler(cls, crawler):
This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_spider_input(self, response, spider):
Called for each response that goes through the spider
middleware and into the spider.
Should return None or raise an exception.
return None
def process_spider_output(self, response, result, spider):
Called with the results returned from the Spider, after
it has processed the response.
Must return an iterable of Request, dict or Item objects.
for i in result:
yield i
def process_spider_exception(self, response, exception, spider):
Called when a spider or process_spider_input() method
(from other spider middleware) raises an exception.
Should return either None or an iterable of Response, dict
or Item objects.
pass
def process_start_requests(self, start_requests, spider):
Called with the start requests of the spider, and works
similarly to the process_spider_output() method, except
that it doesn’t have a response associated.
Must return only requests (not items).
for r in start_requests:
yield r
def spider_opened(self, spider):
spider.logger.info(‘Spider opened: %s’ % spider.name)
class Zol2DownloaderMiddleware(object):
Not all methods need to be defined. If a method is not defined,
scrapy acts as if the downloader middleware does not modify the
passed objects.
@classmethod
def from_crawler(cls, crawler):
This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_request(self, request, spider):
Called for each request that goes through the downloader
middleware.
Must either:
- return None: continue processing this request
- or return a Response object
- or return a Request object
- or raise IgnoreRequest: process_exception() methods of
installed downloader middleware will be called
return None
def process_response(self, request, response, spider):
Called with the response returned from the downloader.
Must either;
- return a Response object
- return a Request object
- or raise IgnoreRequest
return response
def process_exception(self, request, exception, spider):
Called when a download handler or a process_request()
(from other downloader middleware) raises an exception.
Must either:
- return None: continue processing this exception
- return a Response object: stops process_exception() chain
- return a Request object: stops process_exception() chain
pass
def spider_opened(self, spider):
spider.logger.info(‘Spider opened: %s’ % spider.name)
settings.py
-- coding: utf-8 --
Scrapy settings for zol2 project
For simplicity, this file contains only settings considered important or
commonly used. You can find more settings consulting the documentation:
自我介绍一下,小编13年上海交大毕业,曾经在小公司待过,也去过华为、OPPO等大厂,18年进入阿里一直到现在。
深知大多数Python工程师,想要提升技能,往往是自己摸索成长或者是报班学习,但对于培训机构动则几千的学费,着实压力不小。自己不成体系的自学效果低效又漫长,而且极易碰到天花板技术停滞不前!
因此收集整理了一份《2024年Python开发全套学习资料》,初衷也很简单,就是希望能够帮助到想自学提升又不知道该从何学起的朋友,同时减轻大家的负担。
既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,基本涵盖了95%以上前端开发知识点,真正体系化!
由于文件比较大,这里只是将部分目录大纲截图出来,每个节点里面都包含大厂面经、学习笔记、源码讲义、实战项目、讲解视频,并且后续会持续更新
如果你觉得这些内容对你有帮助,可以添加V:vip1024c 备注Python获取(资料价值较高,非无偿)
最后
🍅 硬核资料:关注即可领取PPT模板、简历模板、行业经典书籍PDF。
🍅 技术互助:技术群大佬指点迷津,你的问题可能不是问题,求资源在群里喊一声。
🍅 面试题库:由技术群里的小伙伴们共同投稿,热乎的大厂面试真题,持续更新中。
🍅 知识体系:含编程语言、算法、大数据生态圈组件(Mysql、Hive、Spark、Flink)、数据仓库、Python、前端等等。
有帮助,可以添加V:vip1024c 备注Python获取(资料价值较高,非无偿)**
[外链图片转存中…(img-kyFrjlGb-1711603233764)]
最后
🍅 硬核资料:关注即可领取PPT模板、简历模板、行业经典书籍PDF。
🍅 技术互助:技术群大佬指点迷津,你的问题可能不是问题,求资源在群里喊一声。
🍅 面试题库:由技术群里的小伙伴们共同投稿,热乎的大厂面试真题,持续更新中。
🍅 知识体系:含编程语言、算法、大数据生态圈组件(Mysql、Hive、Spark、Flink)、数据仓库、Python、前端等等。