1.在github上下载一个获取IP代理池的程序
https://github.com/Python3WebSpider/ProxyPool
上面有使用教程
例子使用方法:
proxy_pool_url = 'http://localhost:5555/random'
proxy = None
def get_proxy():
try:
response = requests.get(proxy_pool_url)
if response.status_code == 200:
return response.text
return None
except ConnectionAbortedError:
return None
def get_index()
proxy=get_proxy()
try:
if proxy:
proxies = {
'http': 'http://' + proxy
}
print('正在使用IP:',proxy)
response = requests.get(url, headers=headers, allow_redirects=False,proxies=proxies)
else:
response = requests.get(url, headers=headers, allow_redirects=False)
if response.status_code == 200:
return response.text
if response.status_code == 302:
print('302')
proxy = get_proxy()
if proxy:
print('正在使用:', proxy)
return get_page_index(offset, keyword)
else:
print('获取代理失败')
return None
except ConnectionError:
proxy=get_proxy()
count+=1
return get_page_index(offset, keyword,count)