import time
import threading
from queue import Queue
class Multithreading(object):
def __init__(self):
"""
初始化队列
"""
self.list_page_queue = Queue(maxsize=100) # 队列任务个数上限可设置
self.detail_page_queue = Queue(maxsize=100) # 队列任务个数上限可设置
def get_list(self):
"""
获取列表页
"""
for i in range(1001):
self.list_page_queue.put(i)
def get_detail(self):
"""
获取详情页
"""
while True:
i = self.list_page_queue.get()
ret = f'第{i}页的详情页'
time.sleep(0.1) # 模拟IO延时
self.detail_page_queue.put(ret)
self.list_page_queue.task_done()
def save(self):
"""
保存结果
"""
while True:
ret = self.detail_page_queue.get()
print(f'成功保存{ret}')
self.detail_page_queue.task_done()
def run(self):
"""
实现主要逻辑--->>> 多线程
"""
thread_list = []
# 获取列表页
for i in range(1): # 单个线程
t_get_list = threading.Thread(target=self.get_list)
thread_list.append(t_get_list)
# 获取详情页
for i in range(10): # 10个线程
t_get_detail = threading.Thread(target=self.get_detail)
thread_list.append(t_get_detail)
# 保存结果
for i in range(5): # 5个线程
t_save = threading.Thread(target=self.save)
thread_list.append(t_save)
# 线程控制
for t in thread_list:
t.setDaemon(True) # 将每一个子线程设为守护线程, 主线程不结束, 子线程不结束
t.start() # 开启每一个子线程
# time.sleep(30) # 等待队列获得任务 注意:有时不需要
for q in [self.list_page_queue, self.detail_page_queue]:
q.join() # 让主线程等待阻塞,等待队列的任务完成之后再结束
print("任务完成主线程结束!")
if __name__ == '__main__':
web_spider = Multithreading()
web_spider.run()