Max retries exceeded with url
寻找可用的ip
import time
from lxml import html # 把lxml是解析xml语言的库
etree = html.etree
import requests
proxy = [] #空的数组,用来存放IP
def askurl(url):
head = {"User-Agent": "Mozilla/5.0"} #设置请求头
r = requests.get(url, time.sleep(1),headers=head)#requests模块,返回json格式数据
html = etree.HTML(r.text) #使用etree将字符串转换成element对象
ips = html.xpath("//td[@data-title='IP']//text()") #使用xpath语句获取到当前页面中的ip
ports = html.xpath("//td[@data-title='PORT']//text()")#同理获取到port
for ip, port in zip(ips, ports): #对两个数组进行遍历
line = ip+':'+port #line为IP+端口号
proxy_temp = {"http":'http://'+line} #前面部分声明使用的协议,后面用于拼接
proxy.append(proxy_temp) #将拼接好的IP放到数组里
urla = # 目标网址
for pro in proxy: # 遍历代理池
try:
res = requests.get(urla, headers=head, proxies=pro)
# print(res.text)
print(res)
print(pro)
except Exception as e: # 这里需要注意的是在python3.7往后必须使用 as e,不然会报错,之前版本使用的是 except Exception, e:
print(pro)
print(e)
continue
if __name__ == '__main__':
askurl('https://www.kuaidaili.com/free/inha' )
参考: