import random
import requests
from lxml import etree
import json
user_agent_list = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
]
# 返回一个随机的User-Agent
def get_random_header():
'''
:return: 字典
'''
headers = {
"User-Agent": random.choice(user_agent_list),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip"
}
return headers
# 保存ip地址和端口
def high_concealment_agent(url="https://www.xicidaili.com/nn"):
response = requests.get(url, headers=get_random_header())
html_text = etree.HTML(response.text)
# 输出有多少页列表
print(html_text.xpath('//div[@class="pagination"]/a[last()-2]/text()'))
ip_list = html_text.xpath('//table[@id="ip_list"]/tr[@class]')
# 输出该页列表有多少个IP
print(len(ip_list))
f = open('ip.txt', 'w', encoding='utf-8')
try:
for i in ip_list:
# 测试代理是否可用
code = test_ip(i.cssselect('td')[5].text.lower(), i.cssselect('td')[1].text, i.cssselect('td')[2].text)
if code == 200:
print(i.cssselect('td')[5].text, i.cssselect('td')[1].text, i.cssselect('td')[2].text)
f.write(i.cssselect('td')[5].text.lower() + '://' + i.cssselect('td')[1].text + ':' + i.cssselect('td')[2].text + "\n")
except Exception as e:
# print(e)
pass
finally:
f.close()
def test_ip(http, ip, port):
'''
:param http: 字符串 http 或者 https
:param ip: 字符串 ip地址
:param port: 字符串 端口
:return: 数值 请求网页的状态码
'''
proxies = {
http: http + '://' + ip + ':' + port
}
print(proxies)
test_url = ['http://httpbin.org/ip']
try:
response = requests.get(test_url[0], headers=get_random_header(), proxies=proxies, allow_redirects=False, timeout=2)
# print(response.text)
# print(response.status_code)
# 只取高匿代理
if json.loads(response.text)['origin'].split(', ')[0] == ip:
print(response.text)
return response.status_code
except Exception as e:
# print(str(e))
pass
high_concealment_agent()
python3 requests测试代理IP是否可用
最新推荐文章于 2024-04-13 02:17:18 发布