Table of Contents
对于任务量较大的程序,可以考虑多线程和多进程来节省时间
简单的看,多进程更适于纯计算程序,多线程使用与多IO切换程序
另外使用jupyter同时运行多个程序也是一种办法
另外jupyter对两者都不友好
多线程
import threading
import time
print(threading.active_count()) # 查看当前线程数量
print(threading.enumerate()) # 枚举当前线程
print(threading.current_thread()) # 当前线程
新添加一个线程
def job_():
print('这是一个线程')
add_th = threading.Thread(target=job_)
add_th.start()
join
在我们没有加入join之前,多线的程序和之后的程序是同时运行的,没有前后关系
如下所示,打印的顺序是T1开始,然后在T1执行的过程中吗,第二个任务完成,
如果我们想要另一个任务在T1之后完成,就要使用join
# 多线程的任务
def thread_job():
print('T1 start')
for i in range(10):
time.sleep(0.1)
print(f'time is {i} **********')
print('T1 finish')
added_thread = threading.Thread(target=thread_job, name='T1')
added_thread.start()
# 另一个任务
time.sleep(0.3)
print('all done')
在加入join之后,完成顺序变成了T1,另一个任务
# 多线程的任务
def thread_job():
print('T1 start')
for i in range(10):
time.sleep(0.1)
print(f'time is {i} **********')
print('T1 finish')
added_thread = threading.Thread(target=thread_job, name='T1')
added_thread.start()
added_thread.join()
# 另一个任务
time.sleep(0.3)
print('all done')
多个线程中,join可以表示同时进行多个线程,待多个线程完成之后再开始之后的程序
如下程序,T1和T2是同时运行的,T1先执行完成,等待T2执行完之后再开始另一个任务
def thread_job():
print('T1 start\n')
for i in range(5):
time.sleep(0.1)
print(f'time is {i}')
print('T1 finish\n')
def T2_job():
print('T2 start\n')
for i in range(5):
time.sleep(0.2)
print(f'time is {i}_____')
print('T2 finish\n')
added_thread = threading.Thread(target=thread_job, name='T1')
thread2 = threading.Thread(target=T2_job, name='T2')
added_thread.start()
thread2.start()
thread2.join()
added_thread.join()
print('all done\n')
如果要保证顺序为,T1,T2,另一个任务,可以将T2线程创建写在T1的join之后
def thread_job():
print('T1 start\n')
for i in range(5):
time.sleep(0.1)
print(f'time is {i}')
print('T1 finish\n')
added_thread = threading.Thread(target=thread_job, name='T1')
added_thread.start()
added_thread.join()
def T2_job():
print('T2 start\n')
for i in range(5):
time.sleep(0.2)
print(f'time is {i}_____')
print('T2 finish\n')
thread2 = threading.Thread(target=T2_job, name='T2')
thread2.start()
thread2.join()
print('all done\n')
Queue
多线程中任务无法使用return
可以使用queue对输出元素进行存入和取出
from queue import Queue
def job(l,q):
for i in range(len(l)):
l[i] = l[i]**2
q.put(l) # 将l放入q之中
q = Queue() # 创建queue,命令为q
threads = [] # 创建线程列表
data = [[1,2,3],[3,4,5],[4,4,4],[5,5,5]]
for i in range(4):
t = threading.Thread(target=job, args=(data[i], q)) # 创建线程
t.start()
threads.append(t) # 将线程添加至线程列表
for thread in threads:
thread.join() # 多个线程进行join
print(q.qsize()) # 此时可知道q之中有四个元素
# 将q之中元素一次取出,放入列表之中
results = []
for _ in range(4):
results.append(q.get())
print(results)
print(q.qsize())# 取出完毕
GIL (全局解释锁)
由于全局解释锁存在,多线程的任务可能并没有正常程序快
所以多线程适用于多IO的程序,不合适用于多计算程序
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-xUXpiRLm-1602662927581)(attachment:image.png)]
import threading
from queue import Queue
import copy
import time
def job(l, q):
res = sum(l)
q.put(res)
def multithreading(l):
q = Queue()
threads = []
for i in range(4):
t = threading.Thread(target=job, args=(copy.copy(l), q), name='T%i' % i)
t.start()
threads.append(t)
[t.join() for t in threads]
total = 0
for _ in range(4):
total += q.get()
print(total)
def normal(l):
total = sum(l)
print(total)
if __name__ == '__main__':
l = list(range(1000000))
s_t = time.time()
normal(l*4)
print('normal: ',time.time()-s_t)
s_t = time.time()
multithreading(l)
print('multithreading: ', time.time()-s_t)
多进程
jupyter 不能使用,pycharm上可以用
多进程的创建和queue
import multiprocessing as mp
def job(q):
res = 0
for i in range(1000):
res += i+i**2+i**3
q.put(res) # queue
if __name__ == '__main__':
q = mp.Queue()
p1 = mp.Process(target=job, args=(q,))
p2 = mp.Process(target=job, args=(q,))
p1.start()
p2.start()
p1.join()
p2.join()
res1 = q.get()
res2 = q.get()
print(res1+res2)
多进程,多线程比较
import multiprocessing as mp
import threading as td
import time
def job(q):
res = 0
for i in range(1000000):
res += i+i**2+i**3
q.put(res) # queue
def multicore():
q = mp.Queue()
p1 = mp.Process(target=job, args=(q,))
p2 = mp.Process(target=job, args=(q,))
p1.start()
p2.start()
p1.join()
p2.join()
res1 = q.get()
res2 = q.get()
print('multicore:' , res1+res2)
def normal():
res = 0
for _ in range(2):
for i in range(1000000):
res += i+i**2+i**3
print('normal:', res)
def multithread():
q = mp.Queue()
t1 = td.Thread(target=job, args=(q,))
t2 = td.Thread(target=job, args=(q,))
t1.start()
t2.start()
t1.join()
t2.join()
res1 = q.get()
res2 = q.get()
print('multithread:', res1+res2)
if __name__ == '__main__':
st = time.time()
normal()
st1= time.time()
print('normal time:', st1 - st)
multithread()
st2 = time.time()
print('multithread time:', st2 - st1)
multicore()
print('multicore time:', time.time()-st2)
'''
normal: 499999666667166666000000
normal time: 2.501274347305298
multithread: 499999666667166666000000
multithread time: 4.894949674606323
multicore: 499999666667166666000000
multicore time: 1.6351540088653564
'''
pool 使用
制定任务,使用pool系统自动分配进程
如计算 0-1000000 的 x+x**2+x**3
使用map(job,可迭代对象)完成多进程
import multiprocessing as mp
import time
def job_p(x):
res = x+x**2+x**3
return res
def nol():
for i in range(1000000):
job_p(i)
def pool_():
pool = mp.Pool()
res = pool.map(job_p, range(1000000))
if __name__ == '__main__':
t1 = time.time()
pool_()
t2 = time.time()
nol()
t3 = time.time()
print(t2-t1)
print(t3-t2)
'''
0.7160415649414062
1.7696497440338135
'''