获取IP:
# coding:utf-8 import requests import json def get_ip(): url = 'XXXXXX' # ip提取接口 print(requests.get(url).text) res = json.loads(requests.get(url).text)["data"][0] proxyHost = res["ip"] proxyPort = res["port"] proxyMeta = "http://%(host)s:%(port)s" % { "host": proxyHost, "port": proxyPort, } proxies = proxyMeta return proxies
在pipelines.py文件中对中间件进行添加:
from scrapy import signals from .utils import get_ip class MyproxiesSpiderMiddleware(object): def __init__(self, ip=''): self.ip = get_ip() def process_request(self, request, spider): print("this is ip:{}".format(self.ip)) request.meta["proxy"] = self.ip
settint.py文件配置:
# IP开关 DOWNLOADER_MIDDLEWARES = { 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 127, '项目名.middlewares.MyproxiesSpiderMiddleware': 126, }