scrapy中设置随机代理
使用fake_useragent来进行伪代理,安装:pip install fake_useragent
首先在middleware.py中写一个随机更换user-agent的class
from fake_useragent import UserAgent class RandomUserAgentMiddleware(object): # 随机更换useragent def __init__(self, crawler): super(RandomUserAgentMiddleware, self).__init__() self.ua = UserAgent() #self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random") @classmethod def from_crawler(cls, crawler): return cls(crawler) def process_request(self, request, spider): request.headers.setdefault('User-Agent', self.ua.random)
ua = Useragent()ua.ieua.chromeua.random #会随机生成一个浏览器的user-agent
为了让user-agent可配置,可在setting文件中增加一行配置:
RANDOM_UA-TYPE = ''rando”
class RandomUserAgentMiddleware(object): # 随机更换useragent def __init__(self, crawler): super(RandomUserAgentMiddleware, self).__init__() self.ua = UserAgent() self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random") @classmethod def from_crawler(cls, crawler): return cls(crawler) def process_request(self, request, spider): def get_ua(): # 闭包,用于获取ua_type return getattr(self.ua, self.ua_type) request.headers.setdefault('User-Agent', get_ua())