一.Join操作
import threading
import time
class MyThread(threading.Thread):
def __init__(self,id):
threading.Thread.__init__(self)
self.id=id
def run(self):
time.sleep(10)
print self.id
def func():
for i in range(5):
print i
t=MyThread(2)
t.start()
t.join()
func()
1.如果没有t.join,会先把func执行完,直接输出0,1,2,3,4,5,等待10秒后输出2
2.如果有t.join,先等待10秒输出2,再执行func
二.Gevent
import gevent
from gevent import monkey
monkey.patch_all()
import requests
import re
url='http://www.vpsgo.com/top-100/'
htmltext=requests.get(url).content
pattern=re.compile(r"(http://www.vpsgo.com/movies/.*?\")")
links=re.findall(pattern,htmltext)
def worker(link,num):
with requests.Session() as s:
s.get(link)
print 'requesting url number',num
print 'done'
print '=' * 25
for num,link in enumerate(links):
jobs=[]
jobs.append(gevent.spawn(worker,link,num))
gevent.joinall(jobs)
gevent.shutdown()
三.爬虫
from threading import Thread
import urllib
import re
def th(ur):
base ="http://finance.yahoo.com/q?s="+ur
regex='<span id="yfs_184_'+ur.lower()+'">(.+?)</span>'
pattern=re.compile(regex)
htmltext=urllib.urlopen(base).read()
results=re.findall(pattern,htmltext)
print "the price of "+str(ur)+"is"+str(results)
#symbolslist=open("symbols.txt").read()
#symbolslist=symbolslist.replace(" ","").split(",")
#print symbolslist
symbolslist=["BIOC","ACCL"]
threadlist=[]
for u in symbolslist:
t=Thread(target=th,args=(u,))
t.start()
threadlist.append(t)
for b in threadlist:
b.join()