- 获取66ip的代理信息
- 创建进程池异步检测代理是否可用
import requests
from lxml import etree
import multiprocessing
import time
def check_proxy(proxy):
try:
url = 'https://www.baidu.com/s?wd=ip'
proxy_dict = {
'http':proxy
}
try:
response = requests.get(url,proxies = proxy_dict,timeout=5)
if response.status_code == 200:
print('此ip可用!',proxy)
return proxy
else:
print('此ip可用!但响应超时!')
return proxy
except:
print('此ip不可用!')
return None
except Exception as e:
print(e)
def get_all_proxy_list(queue):
url = 'http://www.66ip.cn/areaindex_35/1.html'
response = requests.get(url)
html_ele = etree.HTML(response.text)
tr_list = html_ele.xpath('//div[@align="center"]/table/tr')
for tr in tr_list[1:]:
ip = tr.xpath('./td[1]/text()')[0]
port = tr.xpath('./td[2]/text()')[0]
proxy = 'http://'+ ip + ':' + port
queue.put(proxy)
if __name__ == '__main__':
q = multiprocessing.Queue()
p = multiprocessing.Process(target=get_all_proxy_list,args=(q,))
p.start()
pool = multiprocessing.Pool(10)
result_list = []
while True:
try:
proxy_str = q.get(timeout=5)
except:
break
proxy_res = pool.apply_async(check_proxy,args=(proxy_str,))
result_list.append(proxy_res)
valid_proxy_list = []
for proxy_res in result_list:
result = proxy_res.get()
if result:
valid_proxy_list.append(result)
print('All vaild we can get')
print(valid_proxy_list)
pool.close()
pool.join()
p.join()