统计代理重复率,测试代理
import re
import requests
def proxy_test(proxy):
headers = {
"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1"
}
proxies = {
"http": proxy,
"https": proxy,
}
target_url = "http://cip.cc"
headers['User-Agent'] = "curl/7.60.0"
for _ in range(5):
try:
response = requests.get(target_url, proxies=proxies, timeout=10, headers=headers, allow_redirects=False, verify=False)
except Exception as e:
continue
else:
break
str_ip = re.search('IP(.+?)\s地址', response.text).group(1)
ip = str_ip.replace(':', '').strip()
return response, ip
def statistics():
status_code = []
ip_list = []
for _ in range(10):
proxy = get_proxy()
response, ip = proxy_test(proxy)
if response.status_code != 200:
status_code.append(response.status_code)
elif response.status_code == 200:
ip_list.append(ip)
print('非200:', len(status_code), status_code)
print('总ip数:', len(ip_list))
print(set(ip_list).__len__())
if __name__ == "__main__":
statistics()