python多线程示例

import threading,queue,time

class Thread(threading.Thread):
    def __init__(self,name,q):
        threading.Thread.__init__(self)
        self.name=name
        self.q=q
    def run(self):
        #print('Starting '+self.name)
        while self.q.qsize()>0:
            print('\r%d/%d'%(lst_length-self.q.qsize(),lst_length),end='') #展示进度
            getData(self.q)
        #print('Ending '+self.name)
def getData(q):
    qu=q.get()
    time.sleep(1)
    result=(qu,qu**2) #结果可能会乱序,所以此处为元组
    results_list.append(result)
def buildQueueAndThreads(lst,threadNum):
    workQueue=queue.Queue()
    [workQueue.put(i) for i in lst] #构建队列
    threads_list=[]
    for i in range(1,threadNum+1):
        thread=Thread('Thread-'+str(i),q=workQueue)
        thread.start()
        threads_list.append(thread)
    print('\nWaiting for finishing...')
    for thread in threads_list: #等待子线程运行完再回到主线程
        thread.join()
threadNum=100
lst=list(range(500))
lst_length=len(lst)
start_time=time.time()
results_list=[]
buildQueueAndThreads(lst,threadNum)
print('\n%d 线程处理耗时: %.fs'%(threadNum,time.time()-start_time))
print('result_list length: ',len(results_list))
print(results_list[:5]) #顺序可能会乱,结果为元组的列表
99/500
Waiting for finishing...
499/500
100 线程处理耗时: 5s
result_list length:  500
[(1, 1), (0, 0), (3, 9), (2, 4), (4, 16)]

如果在函数中应用多线程,可以这样写:(Python threading实现多线程 基础篇 - 知乎

import threading,queue,time
 
class Thread(threading.Thread):
    def __init__(self,name,q):
        threading.Thread.__init__(self)
        self.name=name
        self.q=q
        self.sub_result_list=[]
    def run(self):
        while self.q.qsize()>0:
            self.sub_result_list.append(getData(self.q))
    def get_sub_result_list(self):
        return self.sub_result_list
def getData(q):
    qu=q.get()
    time.sleep(1)
    result=(qu,qu**2) #结果可能会乱序,所以此处为元组
    return result #results_list.append(result)
def buildQueueAndThreadsAndGetResults(lst,threadNum):
    workQueue=queue.Queue()
    [workQueue.put(i) for i in lst] #构建队列
    threads_list=[]
    for i in range(1,threadNum+1):
        thread=Thread('Thread-'+str(i),q=workQueue)
        thread.start()
        threads_list.append(thread)
    print('\nWaiting for finishing...')
    for thread in threads_list: #等待子线程运行完再回到主线程
        thread.join()
    results_list=[]
    for t in threads_list:
        results_list.extend(t.get_sub_result_list())
    return results_list
def f():
    threadNum=10
    lst=list(range(20))
    lst_length=len(lst)
    start_time=time.time()
    results_list=buildQueueAndThreadsAndGetResults(lst,threadNum)
    print('\n%d 线程处理耗时: %.fs'%(threadNum,time.time()-start_time))
    print('result_list length: ',len(results_list))
    print(results_list[:5]) #顺序可能会乱,结果为元组的列表
if __name__=="__main__":
    f()
Waiting for finishing...

10 线程处理耗时: 2s
result_list length:  20
[(0, 0), (10, 100), (1, 1), (11, 121), (2, 4)] 

有时爬虫程序设置较多线程时,频繁地访问会被服务器拒绝,我们希望为每个线程分配一定数量的任务,当任务数量(尝试爬取次数)达到时即自动结束,这样虽然不能保证数据获取完全,但会随着任务数量的减少,线程数自动减少,从而降低访问频率。未获取完全的部分数据可通过多次运行来实现:

#多线程示例
import threading,queue,time
 
class Thread(threading.Thread):
    def __init__(self,name,q):
        threading.Thread.__init__(self)
        self.name=name
        self.q=q
        self.count=0
    def run(self):
        #print('Starting '+self.name)
        while self.q.qsize()>0 and self.count<10 and continue_all_threads:
            self.count+=1
            print('\r%d/%d continue all threads:%s'%(lst_length-self.q.qsize(),lst_length,continue_all_threads),end='') #展示进度
        #print('Ending '+self.name)
def getData(q):
    qu=q.get()
    time.sleep(1)
    result=(qu,qu**2) #结果可能会乱序,所以此处为元组
    results_list.append(result)
def buildQueueAndThreads(lst,threadNum):
    workQueue=queue.Queue()
    [workQueue.put(i) for i in lst] #构建队列
    threads_list=[]
    for i in range(1,threadNum+1):
        thread=Thread('Thread-'+str(i),q=workQueue)
        time.sleep(0.1)#延时0.1s,否则进度输出可能不是所期望的结果
        thread.start()
        threads_list.append(thread)
    print('\nWaiting for finishing...')
    #for thread in threads_list: #不等子线程结束再进行主线程,主线程与子线程同时进行
    #    thread.join()    
    alive_num=threadNum
    lst_length=len(lst)
    last_workQueue_size=workQueue.qsize()
    last_time=time.time()    
    while alive_num>0:
        alive_num=len([t for t in threads_list if t.is_alive()])
        workQueue_size=workQueue.qsize()
        if workQueue_size<last_workQueue_size:
            last_workQueue_size=workQueue_size
            last_time=time.time()
        delta_time=time.time()-last_time
        if delta_time>5: #如果超过5s,队列中的任务数量没有减少,则爬取失败,跳出循环。线程函数非爬虫程序,所以此处情况不会发生
            break
        print('\rAliving num:%d Progress:%d/%d'%(alive_num,lst_length-workQueue.qsize(),lst_length),end='')
    global continue_all_threads #此处要声明为全局变量
    continue_all_threads=False
threadNum=6
lst=list(range(56))
lst_length=len(lst)
start_time=time.time()
print()
results_list=[]
continue_all_threads=True
buildQueueAndThreads(lst,threadNum)
print('\n%d 线程处理耗时: %.1fs'%(threadNum,time.time()-start_time))
print('result_list length: ',len(results_list))
print(results_list[:5]) #顺序可能会乱,结果为元组的列表
5/56 continue all threads:True
Waiting for finishing...
Aliving num:0 Progress:56/56rue
6 线程处理耗时: 10.4s
result_list length:  56
[(0, 0), (1, 1), (2, 4), (3, 9), (4, 16)]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值