主要练习了ThreadPoolExecutor,map和submit的区别,推荐使用submit更灵活
import os
import random
import threading
import requests as rq
import time
from threading import Thread, Lock
from queue import Queue # 用于多线程之间线程安全的数据通信
from concurrent.futures import ThreadPoolExecutor, as_completed
'''
使用线程池的好处
1.提升性能,省去大量创建线程的开销
2.适合处理需要大量线程完成任务,但是实际任务处理时间较短的任务
3.使用线程池的语法比自己新建线程执行更加简洁
################################################################
ThreadPoolExecutor---map
with ThreadPoolExecutor() as pool:
results = pool.map(craw, urls)
for result in results:
print(result)
map使用简单,需要预先设置一个固定的参数urls列表
map的结果和入参的顺序是一致的,只支持1次提交
################################################################
ThreadPoolExecutor---submit
with ThreadPoolExecutor() as pool:
futures = [pool.submit(craw, url) for url in urls]
for future in futures:#结果和入参的顺序是一致的
print(future.result())
html_queue.put(future.result())
################################################################
with ThreadPoolExecutor() as pool:
futures = [pool.submit(craw, url) for url in urls]
for future in as_completed(futures):#as_completed后的结果顺序是不固定的
print(future.result())
html_queue.put(future.result())
'''
url_list = [
f"http://www.cnblogs.com/#p/{page}" for page in range(1, 11)
]
def craw(url):
res = rq.get(url)
return len(res.text)
########################################
# Queue应用
# producer_customer模式
def producer(urls, html_queue):
# with ThreadPoolExecutor() as pool:
# results = pool.map(craw, urls)
# for html in results:
# print(html)
# html_queue.put(html)
####################################
with ThreadPoolExecutor() as pool:
futures = [pool.submit(craw, url) for url in urls]
for future in futures: # 结果和入参的顺序是一致的
print(future.result())
html_queue.put(future.result())
if __name__ == '__main__':
html_queue = Queue()
producer(url_list, html_queue)