Python 对公布的代理服务器网站进行验证是否可以使用
实验时找到一个网站实时公布代理服务器:http://www.xicidaili.com/nn/
代码如下:
import urllib2
import socket
from bs4 import BeautifulSoup
import os
def IsOpen(ip, port):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect((ip, int(port)))
s.shutdown(2)
return True
except:
return False
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0'}
url = 'http://www.xicidaili.com/nn/'
hurl = urllib2.Request(url, headers=header)
html_doc = urllib2.urlopen(hurl).read()
soup = BeautifulSoup(html_doc,features="lxml")
trs = soup.find('table', id='ip_list').find_all('tr')
for tr in trs[1:]:
tds = tr.find_all('td')
ip = tds[1].text.strip()
port = tds[2].text.strip()
protocol = tds[5].text.strip()
if protocol == 'HTTP' or protocol == 'HTTPS':
print ip
if IsOpen(ip, port):
print ip + ":" + port + " is OK"
with open("yes.txt", "a") as ava:
ava.write(ip + ":" + port + os.linesep)