python多线程验证ip

一.第一种线程创建方式

#coding=utf-8
#coding=utf-8
import requests
import json
import re
import Queue
import traceback
import MySQLdb
import cookielib
import urllib2
import threading
from common import *

url = 'http://1212.ip138.com/ic.asp'
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}
time_now=time.strftime('%Y-%m-%d',time.localtime(time.time()))


global tmdn_ip
tmdn_ip=[]
global wipo_ip
wipo_ip=[]
global b
b=0

class IP:
    def __init__(self, ip_num=None, ip_place=None,web_site=None):
        self.ip_num=ip_num
        self.ip_place=ip_place
        self.web_site=web_site

class IpStore:
    def __init__(self,):
        self.conn=MySQLdb.connect(host=MYSQL_HOST, user=MYSQL_USER, passwd=MYSQL_PASSWD, db=MYSQL_DB,
                                        charset='utf8',unix_socket=MYSQL_SOCKET)

    def insert_tmdn(self, use_ip):
        sql = 'insert into scb_crawler_wt_ip (ip,place,website) values (%s,%s,%s)'
        cursor = self.conn.cursor()
        cursor.execute(sql,(use_ip.ip_num,use_ip.ip_place,use_ip.web_site))
        self.conn.commit()

    def select_tmdn(self):
        sql='select ip from scb_crawler_wt_ip WHERE website="tmdn"'
        cursor = self.conn.cursor()
        cursor.execute(sql)
        ips_out=cursor.fetchall()
        cursor.close()
        return ips_out

    def select(self):
        sql='select ip from scb_crawler_wt_ip'
        cursor = self.conn.cursor()
        cursor.execute(sql)
        ips_out=cursor.fetchall()
        cursor.close()
        return ips_out

    def select_wipo(self):
        sql='select ip from scb_crawler_wt_ip WHERE website="wipo"'
        cursor = self.conn.cursor()
        cursor.execute(sql)
        ips_out=cursor.fetchall()
        cursor.close()
        return ips_out

    def delete_ip(self, i_p):
        sql = 'delete from scb_crawler_wt_ip where ip="%s"' % i_p
        print sql
        cursor = self.conn.cursor()
        cursor.execute(sql)
        self.conn.commit()

    def close_con(self):
        self.conn.close()

class myThread(threading.Thread):
    def __init__(self, ips):
        threading.Thread.__init__(self)
        self.ips = ips

    def run(self):
        global ips
        while True:
            if not ips.empty():
                ip = ips.get()
                check_wipo(ip)
                check_tmdn(ip)
            else:
                break
            self.ips.task_done()


def get_ips():
    url='http://dev.kuaidaili.co...............................'
    req = requests.get(url).text
    req = json.loads(req)
    proxy_queue = Queue.Queue()
    proxy_list = req['data']['proxy_list']
    for i in proxy_list:
        proxy_queue.put(i)
    return proxy_queue


def check_tmdn(ip):
    global tmdn_ip
    global wipo_ip
    global b
    b+=1
    print b
    proxie = {
        'http': 'http://' + ip
    }
    #print ip
    try:
        req = requests.get(url, headers=headers, proxies=proxie).content.decode('gb2312')
        place_key = u'自\:(.+?)\<'
        place = re.findall(place_key, req)[0].strip()
        key = ip.split(':')[0]
        result = re.findall(key, req)
        if len(result) > 0 and place in [u'美国',u'荷兰']:
            print ip
            ip=IP(ip,place,'tmdn')
            tmdn_ip.append(ip)
    except Exception,e:
        #print traceback.format_exc()
        pass


def check_wipo(ip):
    global tmdn_ip
    global wipo_ip
    global b
    b+=1
    print b
    proxie = {
        'http': 'http://' + ip
    }
    #print ip
    try:
        cookie = cookielib.CookieJar()
        handler = urllib2.HTTPCookieProcessor(cookie)
        #req = requests.get(url, headers=headers, proxies=proxie).content.decode('gb2312')
        proxy_handler = urllib2.ProxyHandler(proxie)
        opener = urllib2.build_opener(proxy_handler, handler)
        request = urllib2.Request(url, headers=headers)
        time_s = time.time() * 1000
        res = opener.open(request, None, timeout=10)
        time_e = time.time() * 1000
        if res.getcode()==200:
            ip = IP(ip, 'China', 'wipo')
            wipo_ip.append(ip)
            print '可用'
            html=res.read()
            print html
    except Exception,e:
        #print traceback.format_exc()
        pass


if __name__=="__main__":
    threads=[]
    for i in range(30):
        ips=get_ips()
        thread=myThread(ips)
        threads.append(thread)
    for t in threads:
        t.daemon = True
        t.start()
    t.join()
    print '===================================================='
    print len(tmdn_ip)
    print '===================================================='
    ipstore = IpStore()
    for ele in tmdn_ip:
        try:
            ipstore.insert_tmdn(ele)
        except Exception,e:
            print traceback.format_exc()
            pass
    for ele in wipo_ip:
        try:
            ipstore.insert_tmdn(ele)
        except Exception, e:
            print traceback.format_exc()
            pass
    ipstore.close_con()




二.第二种线程创建方式

#coding=utf-8
import requests
import json
import re
import sys
import Queue
import traceback
import MySQLdb
import threading

url='http://1212.ip138.com/ic.asp'
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}

global tmdn_ip
useable_ip=[]


class IpStore:
    def __init__(self):
        try:
            self.conn = MySQLdb.connect('localhost', user='root', passwd="******", db='test',
                                        charset='utf8')
            cursor = self.conn.cursor()
        except:
            traceback.print_exc()
            sys.exit()
    def insert_info(self,ip):
        sql='insert into myip (ip) values (%s)'
        cursor=self.conn.cursor()
        cursor.execute(sql,ip)
        self.conn.commit()

def get_ips():
    url='http://dev.kuaidaili.com**************************************'#购买的代理链接
    req=requests.get(url).text
    req=json.loads(req)
    proxy_queue=Queue.Queue()
    proxy_list=req['data']['proxy_list']
    for i in proxy_list:
        proxy_queue.put(i)
    return proxy_queue

def check_ip():
    global ips
    ip=ips.get()
    proxie={
        'http':'http://'+ip
    }
    try:
        req=requests.get(url,headers=headers,proxies=proxie).content.decode('gb2312')
        print(req)
        key=ip.split(':')[0]
        print(key)
        result=re.findall(key,req)
        if len(result)>0:
	    useable_ip.append(ip)
            print("匹配成功")
    except:
        pass
if __name__=="__main__":
    ipstore=IpStore()
    ips=get_ips()
    print(type(ips))
    threads=[]
    for i in range(40):
        t=threading.Thread(target=check_ip)
        threads.append(t)
    for t in threads:
        t.start()
        t.daemon = True
    t.join()
    #插入数据库的操作没写







  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值