#!/usr/bin/env python
# coding: utf-8
import requests, urllib2, time
from lxml import etree
import time, threading
# Check proxy
def check_proxy(ip, port):
# global fp
try:
ip = "%s:%s" % (ip, port)
proxy_ip = {"http": ip}
proxy_handler = urllib2.ProxyHandler(proxy_ip)
# proxy = urllib2.ProxyHandler(proxy_ip)
opener = urllib2.build_opener(proxy_handler)
# opener.addheaders = [('User-agent', user_agent)] #这句加上以后无法正常检测,不知道是什么原因。
urllib2.install_opener(opener)
req = urllib2.Request(ip_check_url)
time_start = time.time()
conn = urllib2.urlopen(req)
# conn = urllib2.urlopen(ip_check_url)
time_end = time.time()
detected_pip = conn.read()
proxy_detected = True
except urllib2.HTTPError, e:
print "ERROR: Code ", e.code
return False
except Exception, detail:
print "ERROR: ", detail
return False
print (" WORKING: " + ip)
if lock.acquire(True):
# lock.acquire()
f.write(ip)
f.write("\r\n")
lock.release()
time.sleep(0.1)
exit(2)
return proxy_detected
if __name__ == '__main__':
url = "http://www.xicidaili.com/wt/"
headers = {'content-type': 'text/html',
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0'}
ip_check_url = 'http://www.baidu.com/index.php'
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0'
socket_timeout = 10
lock = threading.Lock()
f = open("ip_c.txt", "a")
try:
r = requests.get(url, headers=headers)
r.raise_for_status() # 如果响应状态码不是 200,就主动抛出异常
tree = etree.HTML(r.content)
ip_nodes = tree.xpath("//tr//td[2]/text()")
port_nodes = tree.xpath("//tr//td[3]/text()")
# print port_nodes
"""
# blablabla...
combined = {}
for i in range(len(students)) :
combined[students[i]] = courses[i]
"""
# tmp = zip(ip_nodes, port_nodes)
# dict((y, x) for x, y in tmp)
tmp = dict(zip(ip_nodes, port_nodes))
# print tmp
threads = []
for (ip, port) in tmp.items():
# proxy_detected = check_proxy(ip, port)
t = threading.Thread(target=check_proxy, args=(ip, port))
threads.append(t)
# if proxy_detected:
# print (" WORKING: " + ip+":"+port)
# else:
# print " FAILED: %s " % (ip,)
for t in threads:
t.start()
for t in threads:
t.join()
# tmp1={ ip_nodes[i]:port_nodes[i] for i in range(len(port_nodes))}
# print ip_avilable
except requests.RequestException as e:
print(e)