在middewares添加以下代码,就可以实现代理IP来获取网站数据 # middlewares.py from twisted.internet.threads import deferToThread import requests class ChangeProxy(object): def __init__(self): # 代理IP列表 self.proxy_list = [ {"ip": "httpIP", "port": "8081", "username": "username", "password": "password"}, {"ip": "httpIP", "port": "80"}, {"ip": "httpIP", "port": "8080"}, {"ip": "httpIP", "port": "8080"}, {"ip": "httpIP", "port": "3128"} # 添加其他代理IP信息 ] def process_request(self, request, spider): deferred = deferToThread(self._send_request, request) deferred.addCallback(self._handle_response) return deferred def _send_request(self, request): # 发送请求的逻辑 # 你可以根据实际
python之scrapy框架循环使用http代理获取网站数据
最新推荐文章于 2024-07-22 22:58:19 发布