https://morvanzhou.github.io/tutorials/python-basic/multiprocessing/1-why/
多进程(多核) Multiprocessing 和多线程 threading 类似, 他们都是在 python 中用来并行运算的
一、为什么需要 MultiprocessingGlobal Interpreter Lock (GIL). 这个东西让 Python 还是一次性只能处理一个东西
因此Python的多线程程序并不能利用多核CPU的优势
二、添加进程:与线程类似
import multiprocessing as mp
def job(a,b):
print(a,b,'\m')
def test():
p1 = mp.Process(target=job, args=('c', 'd'))
p1.start()
p1.join()
if __name__ == '__main__':
test()
三、存储进程输出 Queue:进程方法无返回值,放到queue中
import multiprocessing as mp
def job(q):
res = 0
for i in range(1000):
res += i+i**2+i**3
q.put(res)
def test():
q = mp.Queue() #这里定义Multiprocessing 的Q
p1 = mp.Process(target=job, args=(q,)) #加逗号说明args是可迭代的东西后面还能加东西
p2 = mp.Process(target=job, args=(q,))
p1.start()
p2.start()
p1.join()
p2.join()
res1 = q.get()
res2 = q.get()
print(res1+res2)
if __name__ == '__main__':
test()
四、Multiprocessing和Threading的对比
可以运行下面例子看出来,多线程比什么多不用还要慢一点(对于纯运算,没用到多核的优势)
当运算工作量很大时(累加10000000),多进程的优势就体现出来了,快了近一倍(可能是我双核)
但是如果工作量不大(可以把100000000改小试试),多进程反而慢,可能是因为创建进程的时间更长
import multiprocessing as mp
import threading as td
import time
def job(q):
res = 0
for i in range(10000000):
res += i+i**2+i**3
q.put(res)
def multcore(): #多进程运算
q = mp.Queue()
p1 = mp.Process(target=job, args=(q,)) #加逗号说明args是可迭代的东西后面还能加东西
p2 = mp.Process(target=job, args=(q,))
p1.start()
p2.start()
p1.join()
p2.join()
res1 = q.get()
res2 = q.get()
print('multcore:',res1+res2)
def normal(): #什么都不用
res = 0
for _ in range(2):
for i in range(10000000):
res += i + i ** 2 + i ** 3
print('normal:',res)
def multithread(): #多线程
q = mp.Queue()
t1 = td.Thread(target=job, args=(q,)) # 加逗号说明args是可迭代的东西后面还能加东西
t2 = td.Thread(target=job, args=(q,))
t1.start()
t2.start()
t1.join()
t2.join()
res1 = q.get()
res2 = q.get()
print('multthread:',res1 + res2)
if __name__ == '__main__':
st = time.time()
normal()
st1 = time.time()
print('normal time:',st1 - st)
multithread()
st2 = time.time()
print('multithread time:',st2 - st1)
multcore()
st3 = time.time()
print('mulcore time:',st3 - st2)
运算结果
normal: 4999999666666716666660000000
normal time: 16.01147174835205
multthread: 4999999666666716666660000000
multithread time: 16.327133893966675
multcore: 4999999666666716666660000000
mulcore time: 9.506707191467285
五、进程池pool: 进程池就是我们将所要运行的东西,放到池子里,Python会自行解决多进程的问题
import multiprocessing as mp
import threading as td
import time
def job(x):
return x*x
def multcore():
pool = mp.Pool(processes=2) #定义一个pool,定义数字告诉系统使用几个核
# mp.Process不能接受返回值,但是pool可以接受返回值
res = pool.map(job,range(10000)) #往pool放入函数和传入的值,会自动分配给每个进程
print(res)
res = pool.apply_async(job,(2,)) #传入一个值给一个核运行
print(res.get()) #拿出返回值
multi_res = [pool.apply_async(job,(i,)) for i in range(10)] #通过迭代传入多个值
print([res.get() for res in multi_res]) #结果也要一个个拿
if __name__ == '__main__':
multcore()
六、共享内存:多线程有全局变量,多进程用cpu的共享内存
定义: value = mp.Value('d',1)
array = mp.Array('i',[1,3,6]) #只是一个一维数组
各参数代表的数据类型
| Type code | C Type | Python Type | Minimum size in bytes |
| --------- | ------------------ | ----------------- | --------------------- |
| `'b'` | signed char | int | 1 |
| `'B'` | unsigned char | int | 1 |
| `'u'` | Py_UNICODE | Unicode character | 2 |
| `'h'` | signed short | int | 2 |
| `'H'` | unsigned short | int | 2 |
| `'i'` | signed int | int | 2 |
| `'I'` | unsigned int | int | 2 |
| `'l'` | signed long | int | 4 |
| `'L'` | unsigned long | int | 4 |
| `'q'` | signed long long | int | 8 |
| `'Q'` | unsigned long long | int | 8 |
| `'f'` | float | float | 4 |
| `'d'` | double | float | 8 |
七、lock锁:不知道为什么。我不加lock也不会出现问题
import multiprocessing as mp
import time
def job(v, num, l):
l.acquire() # 锁住
for _ in range(5):
time.sleep(0.1)
v.value += num # 获取共享内存
print(v.value)
l.release() # 释放
def multicore():
l = mp.Lock() # 定义一个进程锁
v = mp.Value('i', 0) # 定义共享内存
p1 = mp.Process(target=job, args=(v,1,l)) # 需要将lock传入
p2 = mp.Process(target=job, args=(v,3,l))
p1.start()
p2.start()
p1.join()
p2.join()
if __name__ == '__main__':
multicore()