一.第一种线程创建方式
#coding=utf-8
#coding=utf-8
import requests
import json
import re
import Queue
import traceback
import MySQLdb
import cookielib
import urllib2
import threading
from common import *
url = 'http://1212.ip138.com/ic.asp'
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}
time_now=time.strftime('%Y-%m-%d',time.localtime(time.time()))
global tmdn_ip
tmdn_ip=[]
global wipo_ip
wipo_ip=[]
global b
b=0
class IP:
def __init__(self, ip_num=None, ip_place=None,web_site=None):
self.ip_num=ip_num
self.ip_place=ip_place
self.web_site=web_site
class IpStore:
def __init__(self,):
self.conn=MySQLdb.connect(host=MYSQL_HOST, user=MYSQL_USER, passwd=MYSQL_PASSWD, db=MYSQL_DB,
charset='utf8',unix_socket=MYSQL_SOCKET)
def insert_tmdn(self, use_ip):
sql = 'insert into scb_crawler_wt_ip (ip,place,website) values (%s,%s,%s)'
cursor = self.conn.cursor()
cursor.execute(sql,(use_ip.ip_num,use_ip.ip_place,use_ip.web_site))
self.conn.commit()
def select_tmdn(self):
sql='select ip from scb_crawler_wt_ip WHERE website="tmdn"'
cursor = self.conn.cursor()
cursor.execute(sql)
ips_out=cursor.fetchall()
cursor.close()
return ips_out
def select(self):
sql='select ip from scb_crawler_wt_ip'
cursor = self.conn.cursor()
cursor.execute(sql)
ips_out=cursor.fetchall()
cursor.close()
return ips_out
def select_wipo(self):
sql='select ip from scb_crawler_wt_ip WHERE website="wipo"'
cursor = self.conn.cursor()
cursor.execute(sql)
ips_out=cursor.fetchall()
cursor.close()
return ips_out
def delete_ip(self, i_p):
sql = 'delete from scb_crawler_wt_ip where ip="%s"' % i_p
print sql
cursor = self.conn.cursor()
cursor.execute(sql)
self.conn.commit()
def close_con(self):
self.conn.close()
class myThread(threading.Thread):
def __init__(self, ips):
threading.Thread.__init__(self)
self.ips = ips
def run(self):
global ips
while True:
if not ips.empty():
ip = ips.get()
check_wipo(ip)
check_tmdn(ip)
else:
break
self.ips.task_done()
def get_ips():
url='http://dev.kuaidaili.co...............................'
req = requests.get(url).text
req = json.loads(req)
proxy_queue = Queue.Queue()
proxy_list = req['data']['proxy_list']
for i in proxy_list:
proxy_queue.put(i)
return proxy_queue
def check_tmdn(ip):
global tmdn_ip
global wipo_ip
global b
b+=1
print b
proxie = {
'http': 'http://' + ip
}
#print ip
try:
req = requests.get(url, headers=headers, proxies=proxie).content.decode('gb2312')
place_key = u'自\:(.+?)\<'
place = re.findall(place_key, req)[0].strip()
key = ip.split(':')[0]
result = re.findall(key, req)
if len(result) > 0 and place in [u'美国',u'荷兰']:
print ip
ip=IP(ip,place,'tmdn')
tmdn_ip.append(ip)
except Exception,e:
#print traceback.format_exc()
pass
def check_wipo(ip):
global tmdn_ip
global wipo_ip
global b
b+=1
print b
proxie = {
'http': 'http://' + ip
}
#print ip
try:
cookie = cookielib.CookieJar()
handler = urllib2.HTTPCookieProcessor(cookie)
#req = requests.get(url, headers=headers, proxies=proxie).content.decode('gb2312')
proxy_handler = urllib2.ProxyHandler(proxie)
opener = urllib2.build_opener(proxy_handler, handler)
request = urllib2.Request(url, headers=headers)
time_s = time.time() * 1000
res = opener.open(request, None, timeout=10)
time_e = time.time() * 1000
if res.getcode()==200:
ip = IP(ip, 'China', 'wipo')
wipo_ip.append(ip)
print '可用'
html=res.read()
print html
except Exception,e:
#print traceback.format_exc()
pass
if __name__=="__main__":
threads=[]
for i in range(30):
ips=get_ips()
thread=myThread(ips)
threads.append(thread)
for t in threads:
t.daemon = True
t.start()
t.join()
print '===================================================='
print len(tmdn_ip)
print '===================================================='
ipstore = IpStore()
for ele in tmdn_ip:
try:
ipstore.insert_tmdn(ele)
except Exception,e:
print traceback.format_exc()
pass
for ele in wipo_ip:
try:
ipstore.insert_tmdn(ele)
except Exception, e:
print traceback.format_exc()
pass
ipstore.close_con()
二.第二种线程创建方式
#coding=utf-8
import requests
import json
import re
import sys
import Queue
import traceback
import MySQLdb
import threading
url='http://1212.ip138.com/ic.asp'
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}
global tmdn_ip
useable_ip=[]
class IpStore:
def __init__(self):
try:
self.conn = MySQLdb.connect('localhost', user='root', passwd="******", db='test',
charset='utf8')
cursor = self.conn.cursor()
except:
traceback.print_exc()
sys.exit()
def insert_info(self,ip):
sql='insert into myip (ip) values (%s)'
cursor=self.conn.cursor()
cursor.execute(sql,ip)
self.conn.commit()
def get_ips():
url='http://dev.kuaidaili.com**************************************'#购买的代理链接
req=requests.get(url).text
req=json.loads(req)
proxy_queue=Queue.Queue()
proxy_list=req['data']['proxy_list']
for i in proxy_list:
proxy_queue.put(i)
return proxy_queue
def check_ip():
global ips
ip=ips.get()
proxie={
'http':'http://'+ip
}
try:
req=requests.get(url,headers=headers,proxies=proxie).content.decode('gb2312')
print(req)
key=ip.split(':')[0]
print(key)
result=re.findall(key,req)
if len(result)>0:
useable_ip.append(ip)
print("匹配成功")
except:
pass
if __name__=="__main__":
ipstore=IpStore()
ips=get_ips()
print(type(ips))
threads=[]
for i in range(40):
t=threading.Thread(target=check_ip)
threads.append(t)
for t in threads:
t.start()
t.daemon = True
t.join()
#插入数据库的操作没写