from scrapy importsignalsimportrandomclassTest001UseragentMiddleware(object):
USER_AGENT=["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1", #Chrome 浏览器
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0", #火狐浏览器
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", #IE浏览器
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)","Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB7.0)","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)","Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;","Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0;rv:11.0) like Gecko","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)" , #搜狗浏览器
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;360SE)",#360浏览器
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;TencentTraveler 4.0)"#QQ浏览器
]defprocess_request(self, request, spider):
user_agent=random.choice(self.USER_AGENT)
request.headers["user-agent"] =user_agentclassCheckUA:defprocess_response(self,request,response,spider):if response.status != '200':
request.dont_filter= True #重新发送的请求对象能够再次进入队列
return response #不能少!
classRandomProxy(object):
IP=['//118.31.250.72:8080']defprocess_request(self, request, spider):
proxy=random.choice(self.IP)
request.meta['proxy'] = "http:%s" %proxy