settings.py
#下载中间件 DOWNLOADER_MIDDLEWARES = { 'douban.middlewares.RandomUserAgent': 100, 'douban.middlewares.RandomProxy': 200, } USER_AGENTS = ['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'] PROXIES = [{"ip_prot" : "121.42.140.113.16816", "user_passwd" : "mr_mao_hacker:sffqry9r"} ]
middlewares.py
#随机的user-agent class RandomUserAgent(object): def process_request(self,request,spider): useragent = random.choice(USER_AGENTS) request.headers.setdefault("User-Agent",useragent) class RandomProxy(object): def process_request(self,request,spider): useragent = random.choice(USER_AGENTS) proxy = random.choice(PROXIES) if proxy['user_passwd'] is None: #没有账户验证 request.meta['proxy'] = "http:// + proxy['ip_port']" else: #有代理验证 base64_userpasswd = base64.b64encode(proxy['passwd']) request.meta['proxy'] = "http://" + proxy['ip_port'] request.headers['Proxy-Authorization'] = 'basic ' + base64_userpasswd