编写UserAgentMiddleware类
# 随机请求头
class UserAgentMiddleware(object):
def __init__(self):
self.user_agent_list=[
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"
,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36Chrome 17.0 – MAC"
,
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"
]
def process_request(self, request, spider):
user_agent=random.choice(self.user_agent_list)
request.headers['User-Agent']= user_agent
设置settings.py文件设置相应的配置和属性在DOWNLOADER_MIDDLEWARES下配置中间件
DOWNLOADER_MIDDLEWARES = {
# 随机代理
'boos_position.middlewares.BoosPositionDownloaderMiddleware': 543,
'example2.middlewares.MyproxiesSpiderMiddleware':125,
# 随机头
'boos_position.middlewares.UserAgentMiddleware': 300
}