方法1:
首先可以在类似西刺网站获取ip并存储在数据库
然后在spider的middlewares.py中添加代理ip
# importing base64 library because we'll need it ONLY in case
#if the proxy we are going to use requires authentication
#-*- coding:utf-8-*-
import base64
from proxy import GetIp,counter
import logging
ips=GetIp().get_ips() ##########################################
class ProxyMiddleware(object):
http_n=0 #counter for http requests
https_n=0 #counter for https requests
# overwrite process request
def process_request(self, request, spider):
# Set the location of the proxy
if request.url.startswith("http://"):
n=ProxyMiddleware.http_n
n=n if n<len(ips['http']) else 0
request.meta['proxy']= "http://%s:%d"%(
ips['http'][n][0],int(ips['http'][n][1]))
logging.info(