pipelines.py代码
from fake_useragent import UserAgent
import requests
class UserAgentMiddlewares(object):
"""
自定义一个UserAgent的下载中间件。
"""
def __init__(self, user_agent_type):
self.ua = UserAgent()
self.user_agent_type = user_agent_type
@classmethod
def from_crawler(cls, crawler):
obj = cls(
user_agent_type=crawler.settings.get('USER_AGENT_TYPE', 'random')
)
return obj
def get_user_agent(self):
# getattr():通过self.ua调用self.user_agent_type
user_agent = getattr(self.ua, self.user_agent_type)
return user_agent
def get_cookies(self):
return requests.get('http://localhost:5000/weibo/random').text
def get_proxy(self):
return requests.get('http://localhost:5010/get/').text
def process_request(self, request, spider):
# 设置随机的User-Agent
request.headers.setdefault(b'User-Agent', self.get_user_agent())
# 设置代理IP
# request.meta['proxy'] = 'http://' + self.get_proxy()
request.cookies = eval(self.get_cookies())
return None
settings.py 代码:
DOWNLOADER_MIDDLEWARES = {
'Weibo.middlewares.WeiboDownloaderMiddleware': None,
'Weibo.middlewares.UserAgentMiddlewares': 543,
}
USER_AGENT_TYPE = 'random'