中间件使用代理
import requests
from fake_useragent import UserAgent
from scrapy import signals
from demo6 import settings
class Demo6SpiderMiddleware(object):
def process_request(self, request, spider):
# ua = UserAgent().chrome
# request.headers['User-Agent'] = ua
# print(ua)
try:
xdaili_url = spider.settings.get('XDAILI_URL') # 从设置中获取代理池
r = requests.get(xdaili_url)
proxy_ip_port = r.text
request.meta['proxy'] = 'http://' + proxy_ip_port
print(xdaili_url)
except requests.exceptions.RequestException:
print('获取讯代理ip失败!')
spider.logger.error('获取讯代理ip失败!')