__author__ = 'multiangle'
__edition__='python3.4'import threading
import urllib.request as request
import time
rawProxyList=[]
checkedProxyList=[]
classproxycheck(threading.Thread):def__init__(self,proxy_list):
threading.Thread.__init__(self)
self.proxy_list=proxy_list
self.timeout=3
self.testurl='http://www.sina.cn/'
self.testStr='手机新浪网'defcheckproxy(self):
cookies=request.HTTPCookieProcessor()
for proxy in self.proxy_list:
handler=request.ProxyHandler({'http':'http://%s'%(proxy)})
opener=request.build_opener(cookies,handler)
t1=time.time()
try:
req=opener.open(self.testurl,timeout=self.timeout)
res=req.read()
res=str(res,encoding='utf8')
usetime=time.time()-t1
if self.testStr in res:
checkedProxyList.append((proxy,usetime))
except Exception as e :
print(e)
defrun(self):
self.checkproxy()
if __name__=='__main__':
num=20
thread_num=10
checkThrends=[]
url='YOUR PROXY URL'#提取代理的网站。
req=request.urlopen(url).read()
req=str(req,encoding='utf-8')
list=req.split('\r\n') #网站返回的是字符串格式,用'\r\n'进行分割
rawProxyList=list
print('get raw proxy')
for i in rawProxyList:
print(i)
# s=proxycheck_test(rawProxyList)
batch_size=int((len(rawProxyList)+thread_num-1)/thread_num)
print(batch_size)
for i in range(thread_num):
t=proxycheck(rawProxyList[batch_size*i:batch_size*(i+1)])
checkThrends.append(t)
for i in range(checkThrends.__len__()):
checkThrends[i].start()
for i in range(checkThrends.__len__()):
checkThrends[i].join()
print(checkedProxyList.__len__(),' useful proxy is find')
for i in checkedProxyList:
print(i)