很多小伙伴在获得了批量的代理IP后,对IP的清洗存在疑问,这篇就叫你快速过滤代理IP
import requests
def proxy_use():
# 以百度为目标url
url = "http://www.baidu.com"
# 设置headers
headers = {
"User-Agent": "Mozilla/5.0 (Wind ows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36"
}
# 代理格式:
# {"协议":"IP:port"}
# 常见协议 HTTP, HTTPS
# 设置批量 IP代理池
proxy_list = [
{"https": "123.34.567.89:8888"},
{"https": "123.34.567.89:8888"},
{"https": "123.34.567.89:8888"},
{"https": "123.34.567.89:8888"}
]
# 过滤 能用 IP
use_proxy = []
for proxy in proxy_list:
try:
# 发送请求,带上timeout参数用来检测代理IP的质量,timeout的参数看具体情况而定
response = requests.get(url, headers=headers, proxies=proxy,timeout=3)
if response.status_code == 200:
use_proxy.append(proxy)
except Exception as e:
print(e)
# 打印 use_proxy,查看符合要求的IP
print(use_proxy)
if __name__ == '__main__':
proxy_use()