https://www.sohu.com/a/286177032_99987664
https://blog.csdn.net/qq_38251616/article/details/81675871?depth_1-utm_source=distribute.pc_relevant.none-task&utm_source=distribute.pc_relevant.none-task
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 21 21:09:00 2020
@author: Administrator
"""
import time
import pandas as pd
import telnetlib
import requests
# 只要前两页
v_ip = []
for k in range(1,3):
url = 'https://www.kuaidaili.com/free/inha/{}/'.format(k)
print(url)
data =pd.read_html(url)[0]
ip = data['IP'].to_list()
v = data['响应速度'].apply(lambda x :x.replace('秒',''))
v = v.to_list()
# 取速度小于1的
for i,j in enumerate(v):
if float(j)<=1:
v_ip.append(ip[i])
# 有很强的反爬机制
time.sleep(2)
# 检测代理ip可用性
badnum = 0
goodnum = 0
good = []
a = requests.get('https://www.baidu.com/',proxies={'https':"https//"+v_ip[0]},timeout=2)
for proxy in v_ip[1:10]:
print('正在检测ip:',proxy)
try:
requests.get('https://www.baidu.com/',proxies={'https':"https://"+proxy})
except:
print(1)