class DownMiddleware1(object):
def process_request(self, request, spider):
"""
请求需要被下载时,经过所有下载器中间件的process_request调用
:param request:
:param spider:
:return:
None,继续后续中间件去下载;
Response对象,停止process_request的执行,开始执行process_response
Request对象,停止中间件的执行,将Request重新调度器
raise IgnoreRequest异常,停止process_request的执行,开始执行process_exception
"""
"""
from scrapy.http import Request
# print(request)
# request.method = "POST"
request.headers['proxy'] = ' {'ip_port': '111.11.228.75:80', 'user_pass': ''},'
return None
"""
"""
from scrapy.http import Response
import requests
v = request.get('http://www.baidu.com')
data = Response(url='xxxxxx',body=v.content,request=request)
return data
"""
def process_response(self, request, response, spider):
"""
spider处理完成,返回时调用
:param response:
:param result:
:param spider:
:return:
Response 对象:转交给其他中间件process_response
Request 对象:停止中间件,request会被重新调度下载
raise IgnoreRequest 异常:调用Request.errback
"""
print('response1')
return response
def process_exception(self, request, exception, spider):
"""
当下载处理器(download handler)或 process_request() (下载中间件)抛出异常
:param response:
:param exception:
:param spider:
:return:
None:继续交给后续中间件处理异常;
Response对象:停止后续process_exception方法
Request对象:停止中间件,request将会被重新调用下载
"""
return None
##settings设置
DOWNLOADER_MIDDLEWARES = {
# 'sp3.middlewares.Sp3DownloaderMiddleware': 543,
'sp3.middlewares.DownMiddleware1':541,
}