本次使用的代理Ip是蘑菇代理,数据库是redis
1.settings配置
"""REDIS 配置链接""" REDIS_URL = "redis://127.0.0.1:6379" RETRY_TIMES = 2
2.写入工具类
import requests import json import time import redis from 项目名.settings import REDIS_URL def get_ip(): coon = redis.from_url(REDIS_URL) url = "生成的API链接" body = requests.get(url) data = json.loads(body.text) code = data["code"] if code == "0": msgs = data["msg"] for msg in msgs: ip = msg["ip"] port = msg["port"] IP = ip + ":" + port coon.lpush("proxies", IP) elif code == "3001": time.sleep(5) get_ip()
3.配置Middlewares
class ProxiesMiddleware(object): def __init__(self): self.coon = redis.from_url(REDIS_URL) def process_request(self, request, spider): if self.coon.llen("proxies") == 0: get_ip() ip_list = self.coon.lrange("proxies", 0, -1) IP_LIST = [] for ip in ip_list: IP = ip.decode("utf-8") IP_LIST.append(IP) request.meta["proxy"] = "http://{}".format(random.choice(IP_LIST)) def process_response(self, response, request, spider): if "人机交互校验" in response.text: proxy = request.meta["proxy"].split("/")[-1] print("需要输入验证码,更换ip") print(response.url) self.coon.lrem("proxies", proxy) return request return response def process_exception(self, request, exception, spider): if exception: proxy = request.meta["proxy"].split("/")[-1] print("删除失效IP") self.coon.lrem("proxies", proxy) return request