一.第一种线程创建方式
#coding=utf-8
#coding=utf-8 import requests import json import re import Queue import traceback import MySQLdb import cookielib import urllib2 import threading from common import * url = 'http://1212.ip138.com/ic.asp' user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)' headers = {'User-Agent': user_agent} time_now=time.strftime('%Y-%m-%d',time.localtime(time.time())) global tmdn_ip tmdn_ip=[] global wipo_ip wipo_ip=[] global b b=0 class IP: def __init__(self, ip_num=None, ip_place=None,web_site=None): self.ip_num=ip_num self.ip_place=ip_place self.web_site=web_site class IpStore: def __init__(self,): self.conn=MySQLdb.connect(host=MYSQL_HOST, user=MYSQL_USER, passwd=MYSQL_PASSWD, db=MYSQL_DB, charset='utf8',unix_socket=MYSQL_SOCKET) def insert_tmdn(self, use_ip): sql = 'insert into scb_crawler_wt_ip (ip,place,website) values (%s,%s,%s)' cursor = self.conn.cursor() cursor.execute(sql,(use_ip.ip_num,use_ip.ip_place,use_ip.web_site)) self.conn.commit() def select_tmdn(self): sql='select ip from scb_crawler_wt_ip WHERE website="tmdn"' cursor = self.conn.cursor() cursor.execute(sql) ips_out=cursor.fetchall() cursor.close() return ips_out def select(self): sql='select ip from scb_crawler_wt_ip' cursor = self.conn.cursor() cursor.execute(sql) ips_out=cursor.fetchall() cursor.close() return ips_out def select_wipo(self): sql='select ip from scb_crawler_wt_ip WHERE website="wipo"' cursor = self.conn.cursor() cursor.execute(sql) ips_out=cursor.fetchall() cursor.close() return ips_out def delete_ip(self, i_p): sql = 'delete from scb_crawler_wt_ip where ip="%s"' % i_p print sql cursor = self.conn.cursor() cursor.execute(sql) self.conn.commit() def close_con(self): self.conn.close() class myThread(threading.Thread): def __init__(self, ips): threading.Thread.__init__(self) self.ips = ips def run(self): global ips while True: if not ips.empty(): ip = ips.get() check_wipo(ip) check_tmdn(ip) else: break self.ips.task_done() def get_ips(): url='http://dev.kuaidaili.co...............................' req = requests.get(url).text req = json.loads(req) proxy_queue = Queue.Queue() proxy_list = req['data']['proxy_list'] for i in proxy_list: proxy_queue.put(i) return proxy_queue def check_tmdn(ip): global tmdn_ip global wipo_ip global b b+=1 print b proxie = { 'http': 'http://' + ip } #print ip try: req = requests.get(url, headers=headers, proxies=proxie).content.decode('gb2312') place_key = u'自\:(.+?)\<' place = re.findall(place_key, req)[0].strip() key = ip.split(':')[0] result = re.findall(key, req) if len(result) > 0 and place in [u'美国',u'荷兰']: print ip ip=IP(ip,place,'tmdn') tmdn_ip.append(ip) except Exception,e: #print traceback.format_exc() pass def check_wipo(ip): global tmdn_ip global wipo_ip global b b+=1 print b proxie = { 'http': 'http://' + ip } #print ip try: cookie = cookielib.CookieJar() handler = urllib2.HTTPCookieProcessor(cookie) #req = requests.get(url, headers=headers, proxies=proxie).content.decode('gb2312') proxy_handler = urllib2.ProxyHandler(proxie) opener = urllib2.build_opener(proxy_handler, handler) request = urllib2.Request(url, headers=headers) time_s = time.time() * 1000 res = opener.open(request, None, timeout=10) time_e = time.time() * 1000 if res.getcode()==200: ip = IP(ip, 'China', 'wipo') wipo_ip.append(ip) print '可用' html=res.read() print html except Exception,e: #print traceback.format_exc() pass if __name__=="__main__":threads=[] for i in range(30): ips=get_ips() thread=myThread(ips) threads.append(thread) for t in threads: t.daemon = True t.start() t.join() print '====================================================' print len(tmdn_ip) print '====================================================' ipstore = IpStore() for ele in tmdn_ip: try: ipstore.insert_tmdn(ele) except Exception,e: print traceback.format_exc() pass for ele in wipo_ip: try: ipstore.insert_tmdn(ele) except Exception, e: print traceback.format_exc() pass ipstore.close_con()
二.第二种线程创建方式#coding=utf-8 import requests import json import re import sys import Queue import traceback import MySQLdb import threading url='http://1212.ip138.com/ic.asp' user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)' headers = {'User-Agent': user_agent}
global tmdn_ip useable_ip=[]
class IpStore: def __init__(self): try: self.conn = MySQLdb.connect('localhost', user='root', passwd="******", db='test', charset='utf8') cursor = self.conn.cursor() except: traceback.print_exc() sys.exit() def insert_info(self,ip): sql='insert into myip (ip) values (%s)' cursor=self.conn.cursor() cursor.execute(sql,ip) self.conn.commit() def get_ips(): url='http://dev.kuaidaili.com**************************************'#购买的代理链接 req=requests.get(url).text req=json.loads(req) proxy_queue=Queue.Queue() proxy_list=req['data']['proxy_list'] for i in proxy_list: proxy_queue.put(i) return proxy_queue def check_ip(): global ips ip=ips.get() proxie={ 'http':'http://'+ip } try: req=requests.get(url,headers=headers,proxies=proxie).content.decode('gb2312') print(req) key=ip.split(':')[0] print(key) result=re.findall(key,req) if len(result)>0:
useable_ip.append(ip) print("匹配成功") except: pass if __name__=="__main__": ipstore=IpStore() ips=get_ips() print(type(ips)) threads=[] for i in range(40): t=threading.Thread(target=check_ip) threads.append(t) for t in threads: t.start() t.daemon = True t.join()
#插入数据库的操作没写