一、函数:
# 面向进程
# t = threading.Thread(target=xxx, name=xxx, args=(xx, xx))
# target: 线程启动后要执行的函数
# name: 线程的名字
# threading.current_thread().name: 获取线程的名字
# args: 主线程向子线程传递的参数
# t.start():启动线程t
# t.join():让主线程等待t线程结束后再结束
# threading.enumerate():显示当前正在进行的线程,得到的是一个列表,注意里面的线程总是大于0的,因为主线程也在里面
# pip install -i https://pypi.douban.com/simple lxml
# 多线程爬虫:
# 分析:
# 两类线程:下载,解析
# 线程之间的交互:内容队列:下载线程往队列中put数据,解析线程从队列中get线程
# url队列:下载线程从url队列中get数据
# 写数据:上锁(本地磁盘化储存) lock.acquire()
# 解锁:lock.release()
# 注意前面要先打开文件,然后传输文件的fp,才能使用上锁和解锁
二、示例:
例:
import threading
import time
from queue import Queue
du_list = []
class PlayThread(threading.Thread):
def __init__(self, name, play_queue):
super(PlayThread, self).__init__()
self.name = name
self.play_queue = play_queue
def run(self):
print('%s启动' % self.name)
while not self.play_queue.empty():
# 最好在获取队列元素时再审核一次,防止因为获取太快而导致bug卡住
# if not self.play_queue.empty():
play = self.play_queue.get()
print(play)
time.sleep(1)
# if play%2 == 1:
# print('唱歌')
# else:
# print('跳舞')
print('%s结束' % self.name)
def create_queue():
q = Queue()
for i in range(1, 11):
q.put(i)
return q
def create_thread(play_queue):
name_list = ['线程1号', '线程2号', '线程3号']
for name in name_list:
tplay = PlayThread(name, play_queue)
du_list.append(tplay)
def main():
play_queue = create_queue()
create_thread(play_queue)
for tplay in du_list:
tplay.start()
for tplay in du_list:
tplay.join()
if __name__ == '__main__':
main()