筛选出两个: 2018.12.1可用
61.183.233.6:54896
221.224.136.211:35101
import requests
import pymysql
class Guolv_proxy(object):
def __init__(self):
self.db = pymysql.connect('127.0.0.1','root','cyl666.','scrapy',charset='utf8')
self.cursor = self.db.cursor()
def get_allproxy(self):
# 读取数据库所有 ip
self.cursor.execute('select ip,port from xici_ip')
res = self.cursor.fetchall()
return res
# 过滤 代理
def filter_proxy(self):
p = self.get_allproxy()
base_url = 'https://www.baidu.com/s?wd=ip'
for url in p:
proxy = {
'http':'http://%s:%s'%(url[0],url[1]),
'https': 'http://%s:%s' % (url[0], url[1])
}
try:
response = requests.get(base_url,proxies=proxy,timeout=10)
if response.status_code == 200:
html = response.text
if '本机IP' in html:
with open('ip.txt','a+',encoding='utf-8') as f:
f.write(url[0]+':'+url[1]+'\n')
print(url,'可以使用')
else:
# 删除代理
self.drop_proxy(url[0])
else:
# 删除代理
self.drop_proxy(url[0])
except Exception as e:
# 删除代理
self.drop_proxy(url[0])
# 删除代理
def drop_proxy(self,ip):
sql = 'delete from xici_ip where ip="%s"'%ip
try:
row = self.cursor.execute(sql)
self.db.commit()
print(ip,'删除成功')
except Exception as e:
print('删除代理失败')
# 关闭连接
def close(self):
self.cursor.close()
self.db.close()
if __name__ == '__main__':
p = Guolv_proxy()
p.filter_proxy()
协程版:
from gevent import monkey;monkey.patch_all()
from gevent.lock import Semaphore
import gevent
import requests
import pymysql
from queue import Queue
class Guolv_proxy(object):
def __init__(self):
self.sem = Semaphore(1)
self.db = pymysql.connect('127.0.0.1','root','cyl666.','scrapy',charset='utf8')
self.cursor = self.db.cursor()
self.proxy_q = Queue()
def get_allproxy(self):
# 读取数据库所有 ip
self.cursor.execute('select ip,port from xici_ip')
res = self.cursor.fetchall()
# return res
for px in res:
self.proxy_q.put(px)
# 过滤 代理
def filter_proxy(self):
while not self.proxy_q.empty():
url = self.proxy_q.get()
base_url = 'https://www.baidu.com/s?wd=ip'
proxy = {
'http':'http://%s:%s'%(url[0],url[1]),
'https': 'http://%s:%s' % (url[0], url[1])
}
try:
response = requests.get(base_url,proxies=proxy,timeout=10)
if 200 <= response.status_code <= 300:
html = response.text
if '本机IP' in html:
with open('ip.txt','a+',encoding='utf-8') as f:
f.write(url[0]+':'+url[1]+'\n')
print(url,'可以使用--------------------------')
else:
# 删除代理
self.drop_proxy(url[0])
else:
# 删除代理
self.drop_proxy(url[0])
except Exception as e:
# 删除代理
self.drop_proxy(url[0])
# 删除代理
def drop_proxy(self,ip):
self.sem.acquire()
sql = 'delete from xici_ip where ip="%s"'%ip
try:
row = self.cursor.execute(sql)
self.db.commit()
print(ip,'删除成功')
except Exception as e:
print('删除代理失败:',e)
finally:
self.sem.release()
# 关闭连接
def close(self):
self.cursor.close()
self.db.close()
def main(self):
# d代理传到 queue
self.get_allproxy()
# 生成协程对象
g_list = []
for i in range(100):
g = gevent.spawn(self.filter_proxy)
g_list.append(g)
# 启动
gevent.joinall(g_list)
if __name__ == '__main__':
p = Guolv_proxy()
p.main()