attribute
Technology:
- threading
- fake useragent
Usage:
Even u get a ip that this program output, but the free ip is hard to maintain stability.
domestic
import requests
import random
import time
from lxml import etree
from fake_useragent import UserAgent
import re
import threading
def get_proxy_lists():
proxy_lists = []
for page_index in range(1, 100):
# 含有ip池的网站url
ip_url = 'https://www.89ip.cn/index_{}.html'.format(page_index)
# 获取含有ip池的网页
headers = {'User-Agent': UserAgent(verify_ssl=False).random}
html = requests.get(url=ip_url, headers=headers).text
elemt = etree.HTML(html)
ips_list = elemt.xpath('//table/tbody/tr/td[1]/text()')
ports_list = elemt.xpath('//table/tbody/tr/td[2]/text()')
for ip, port in zip(ips_list, ports_list):
# 拼接ip与port
proxy = ip.strip() + ":" + port.strip()
proxy_lists.append(proxy)
return proxy_lists
pass
def ip_pool_foreign(proxy, ip_pool_foreign_lists, test_url='http://httpbin.org/get'):
headers = {'User-Agent': UserAgent(verify_ssl=False).random}
proxies = {
'http': 'http://{}'.format(proxy),
'https': 'https://{}'.format(proxy),
}
try:
resp = requests.get(
url=test_url,
headers=headers,
proxies=proxies, # proxies = {'协议': '协议://IP:端口号'}
timeout=3) # timeout 超时设置 网页响应时间3秒 超过时间会抛出异常
# 获取 状态码为200
if resp.status_code == 200:
# print(proxy, 'ok')
ip_pool_foreign_lists.append(proxy+'\n')
else:
pass
# print(proxy, 'pass')
except Exception as e:
pass
# print(proxy, 'pass')
pass
if __name__ == '__main__':
threads = []
ip_pool_foreign_lists = []
proxy_lists = get_proxy_lists()
print(40*'-', 'proxy_lists over.', 40*'-')
for proxy in proxy_lists:
t = threading.Thread(target=ip_pool_foreign,
args=(proxy, ip_pool_foreign_lists))
threads.append(t)
print(40*'-', 'append over.', 40*'-')
for t in threads:
t.setDaemon(True)
t.start()
print(40*'-', 'start over.', 40*'-')
for t in threads:
t.join()
print(40*'-', 'join over.', 40*'-')
print(40*'-', '[{%d}]' % len(ip_pool_foreign_lists), 40*'-')
for ip in ip_pool_foreign_lists:
print(40*'-', ip, 40*'-')
with open(r'ip_pool_home.txt', 'w', encoding="utf-8") as fp:
fp.writelines(ip_pool_foreign_lists)
foreign
import requests
import random
import time
from lxml import etree
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
import re
import threading
def get_proxy_lists(a=1,b=500):
proxy_lists = []
for page_index in range(a, b):
print('\r{}/{}'.format(page_index, b-a))
# 含有ip池的网站url
ip_url = 'http://www.66ip.cn/{}.html'.format(page_index)
# 获取含有ip池的网页
headers = {'User-Agent': UserAgent(verify_ssl=False).random}
html = requests.get(url=ip_url, headers=headers).text
soup = BeautifulSoup(html, 'lxml')
ips_list = re.findall('<tr><td>(.*?)</td><td>', str(soup))[1:]
ports_list = re.findall(r'</td><td>(\d*?)</td><td>', str(soup))
for ip, port in zip(ips_list, ports_list):
# 拼接ip与port
proxy = ip.strip() + ":" + port.strip()
proxy_lists.append(proxy)
if(page_index % 10 == 0):
time.sleep(random.randint(0,5));
return proxy_lists
pass
def ip_pool_foreign(proxy, ip_pool_foreign_lists, test_url='https://www.pixiv.net/'):
headers = {'User-Agent': UserAgent(verify_ssl=False).random}
proxies = {
'http': 'http://{}'.format(proxy),
'https': 'https://{}'.format(proxy),
}
try:
resp = requests.get(
url=test_url,
headers=headers,
proxies=proxies, # proxies = {'协议': '协议://IP:端口号'}
timeout=3) # timeout 超时设置 网页响应时间3秒 超过时间会抛出异常
if resp.status_code == 200:
# print(proxy, 'ok')
ip_pool_foreign_lists.append(proxy+'\n')
else:
pass
# print(proxy, 'pass')
except Exception as e:
pass
# print(proxy, 'pass')
pass
if __name__ == '__main__':
threads = []
ip_pool_foreign_lists = []
proxy_lists = get_proxy_lists()
print(40*'-', 'proxy_lists over.', 40*'-')
for proxy in proxy_lists:
t = threading.Thread(target=ip_pool_foreign,
args=(proxy, ip_pool_foreign_lists))
threads.append(t)
print(40*'-', 'append over.', 40*'-')
for t in threads:
t.setDaemon(True)
t.start()
print(40*'-', 'start over.', 40*'-')
for t in threads:
t.join()
print(40*'-', 'join over.', 40*'-')
print(40*'-', '[{%d}]' % len(ip_pool_foreign_lists), 40*'-')
for ip in ip_pool_foreign_lists:
print(40*'-', ip, 40*'-')
with open(r'Git\pixiv_download\ip_pool_foreign.txt', 'w', encoding="utf-8") as fp:
fp.writelines(ip_pool_foreign_lists)