刷票啊,刷赞啊,一些刷的东西,都需要用到代理ip,也就是python爬虫中的proxies,今天就检测了一个免费的开放的代理ip网站里的东西。
import requests
import time
from bs4 import BeautifulSoup
url = 'http://icanhazip.com/'
count = 0
header = {
"user-Agent": "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)"
}
# 芝麻HTTP
post_page_url = 'http://wapi.http.linkudp.com/index/index/get_free_ip'
for page in range(1, 10):
data = {
'page': str(page)
}
re = requests.post(post_page_url, headers=header, data=data)
re_html = re.json()['ret_data']['html']
ips = BeautifulSoup(re_html, 'html.parser')
IPs = ips.find_all(class_='tr')
for ip in IPs:
i_ip = (ip.find_all('td')[0].text+':'+ip.find_all('td')[1].text).split('FREE')[1]
try:
test = requests.get(url, headers=header, proxies={'https': 'http://' + str(ip)}, timeout=10)
if test.status_code ==200:
print(i_ip)
except:
count = count + 1
print(str(count)+'个不可用')
time.sleep(1)
# 快代理
get_ip_url_initial = 'https://www.kuaidaili.com/free/inha/'
for page in range(1, 15):
get_ip_url = get_ip_url_initial+str(page)+'/'
html = requests.get(get_ip_url, headers=header)
html_text = BeautifulSoup(html.text, 'html.parser')
IPS = html_text.find('tbody').find_all('tr')
for IP in IPS:
ip_text = IP.find_all('td')
ip = ip_text[0].text+':'+ip_text[1].text
try:
test = requests.get(url, headers=header, proxies={'http': 'http://' + str(ip)}, timeout=3)
if test.status_code == 200:
# test_html = BeautifulSoup(test.text, 'html.parser')
# your_ip = test_html.find_all(class_='cf-footer-item sm:block sm:mb-1')
# print(your_ip[1].text)
print(test.text)
except:
count = count + 1
print(str(count) + '个不可用')
finally:
time.sleep(1)
print('第'+str(page)+'页完成')
# 齐云代理
get_ip_url_initial = 'https://www.7yip.cn/free/?action=china&page='
for page in range(5, 11):
get_ip_url = get_ip_url_initial + str(page)
get_ip = requests.get(get_ip_url, headers=header)
get_ip_text = BeautifulSoup(get_ip.text, 'html.parser')
IPS = get_ip_text.find('tbody').find_all('tr')
for IP in IPS:
ips = IP.find_all('td')
ip = ips[0].text+':'+ips[1].text
try:
test = requests.get(url, headers=header, proxies={'https': 'https ://' + str(ip)}, timeout=3)
if test.status_code == 200:
test_html = BeautifulSoup(test.text, 'html.parser')
your_ip = test_html.find_all(class_='cf-footer-item sm:block sm:mb-1')
print(your_ip[1].text)
except:
count = count + 1
print(str(count) + '个不可用')
finally:
time.sleep(1)
print('第' + str(page) + '页完成')
如果是想要追求高效率的,可以考虑购买代理ip了