Python 的 简单多线程实现 用 dummy 模块 一句话就可以搞定,但需要对线程,队列做进一步的操作,最好自己写个线程池类来实现。
Code:
# coding:utf-8
# version: 0.1
import re,time
from requests import get
from Queue import Queue, Empty
from threading import Thread
# 全局变量
COUNT = 0
# 爬虫类
class Spider(Thread):
"""docstring for Spider"""
def __init__(self,queue):
Thread.__init__(self)
self.queue = queue
self.start() # 执行 run()
def run(self):
"每次读取 queue 的一条"
global COUNT
while(1):
try:
sth = self.queue.get(block=false)
except Empty:
break
except Exception,e:
print '[- Excpt :]',str(e)
print COUNT
COUNT += 1
# 线程池类
class ThreadPool(object):
def __init__(self):
self.queue = Queue() # 需要执行的队列
self.threads = [] # 多线程列秒
pass
def add_task(self):
pass
def init_threads(self):
pass
def wait(self):
for t in self.threads:
if t.isAlive():
t.join()
if __name__ == '__main__':
start = time.time()
tp = ThreadPool(thread_num)
tp.wait()
end = time.time()
print '[ - info ] cost time :{}'.format(end - start)