Python 过滤mysql 中有用的ip

 筛选出两个: 2018.12.1可用

61.183.233.6:54896

221.224.136.211:35101

import requests
import pymysql

class Guolv_proxy(object):
    def __init__(self):
        self.db = pymysql.connect('127.0.0.1','root','cyl666.','scrapy',charset='utf8')
        self.cursor = self.db.cursor()

    def get_allproxy(self):
        # 读取数据库所有 ip
        self.cursor.execute('select ip,port from xici_ip')
        res = self.cursor.fetchall()
        return res

    # 过滤 代理
    def filter_proxy(self):
        p = self.get_allproxy()
        base_url = 'https://www.baidu.com/s?wd=ip'
        for url in p:
            proxy = {
                'http':'http://%s:%s'%(url[0],url[1]),
                'https': 'http://%s:%s' % (url[0], url[1])
            }
            try:
                response = requests.get(base_url,proxies=proxy,timeout=10)
                if response.status_code == 200:
                    html = response.text
                    if '本机IP' in html:
                        with open('ip.txt','a+',encoding='utf-8') as f:
                            f.write(url[0]+':'+url[1]+'\n')
                        print(url,'可以使用')
                    else:
                        # 删除代理
                        self.drop_proxy(url[0])
                else:
                    # 删除代理
                    self.drop_proxy(url[0])
            except Exception as e:
                # 删除代理
                self.drop_proxy(url[0])

    # 删除代理
    def drop_proxy(self,ip):
        sql = 'delete from xici_ip where ip="%s"'%ip
        try:
            row = self.cursor.execute(sql)
            self.db.commit()
            print(ip,'删除成功')
        except Exception as e:
            print('删除代理失败')

    # 关闭连接
    def close(self):
        self.cursor.close()
        self.db.close()

if __name__ == '__main__':
    p = Guolv_proxy()
    p.filter_proxy()

协程版:

from gevent import monkey;monkey.patch_all()
from gevent.lock import Semaphore
import gevent
import requests
import pymysql
from queue import Queue


class Guolv_proxy(object):
    def __init__(self):
        self.sem = Semaphore(1)

        self.db = pymysql.connect('127.0.0.1','root','cyl666.','scrapy',charset='utf8')
        self.cursor = self.db.cursor()
        self.proxy_q = Queue()

    def get_allproxy(self):
        # 读取数据库所有 ip
        self.cursor.execute('select ip,port from xici_ip')
        res = self.cursor.fetchall()
        # return res
        for px in res:
            self.proxy_q.put(px)

    # 过滤 代理
    def filter_proxy(self):
        while not self.proxy_q.empty():
            url = self.proxy_q.get()
            base_url = 'https://www.baidu.com/s?wd=ip'
            proxy = {
                'http':'http://%s:%s'%(url[0],url[1]),
                'https': 'http://%s:%s' % (url[0], url[1])
            }
            try:
                response = requests.get(base_url,proxies=proxy,timeout=10)
                if 200 <= response.status_code <= 300:
                    html = response.text
                    if '本机IP' in html:
                        with open('ip.txt','a+',encoding='utf-8') as f:
                            f.write(url[0]+':'+url[1]+'\n')
                        print(url,'可以使用--------------------------')
                    else:
                        # 删除代理
                        self.drop_proxy(url[0])
                else:
                    # 删除代理
                    self.drop_proxy(url[0])
            except Exception as e:
                # 删除代理
                self.drop_proxy(url[0])

    # 删除代理
    def drop_proxy(self,ip):
        self.sem.acquire()
        sql = 'delete from xici_ip where ip="%s"'%ip
        try:
            row = self.cursor.execute(sql)
            self.db.commit()
            print(ip,'删除成功')
        except Exception as e:
            print('删除代理失败:',e)
        finally:
            self.sem.release()

    # 关闭连接
    def close(self):
        self.cursor.close()
        self.db.close()

    def main(self):
        # d代理传到 queue
        self.get_allproxy()
        # 生成协程对象
        g_list = []
        for i in range(100):
            g = gevent.spawn(self.filter_proxy)
            g_list.append(g)

        # 启动
        gevent.joinall(g_list)

if __name__ == '__main__':
    p = Guolv_proxy()
    p.main()

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值