import requests
from lxml import etree
url = 'http://www.xicidaili.com/'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
response = requests.get(url, headers=headers)
# with open('xicidaili.html', 'wb') as f:
# f.write(response.content)
html_ele = etree.HTML(response.text)
tr_list = html_ele.xpath('//table[@id="ip_list"]/tr')
# print(tr_list)
country_td = []
for tr_ele in tr_list:
# print(tr_ele)
country_list = tr_ele.xpath('./td[2]/text()')
country_list1 = tr_ele.xpath('./td[3]/text()')
# country = country_list + country_list1
# print(country)
# print(type(country_list))
# print(country_list)
if country_list == []:
continue
elif country_list1 == []:
continue
else:
ip = country_list[0]
ip_d = country_list1[0]
# print(ip)
# print(ip_d)
# ip_list = country_list
# print(ip_list)
ip_1 = ip + ':' + ip_d
# print(ip_1)
# print(type(ip_1))
# http_t = 'http://' + ip_1
proxies = ip_1
url = 'http://www.baidu.com'
# response = requests.get(url, headers=headers, proxies=proxies)
normal_proxies = []
count= 1
print("第%s个。。" % count)
count += 1
try:
response = requests.get(url, headers=headers, proxies={"http": proxies}, timeout=1)
if response.status_code == 200:
print("该代理IP可用:", proxies)
normal_proxies.append(proxies)
else:
print("该代理IP不可用:", proxies)
except Exception:
print("该代理IP无效:", proxies)
pass