一、Threadpool,需要通过pip安装包 [sudo] pip install threadpool
旧的线程方法,建议使用multiprocessing
>>> pool = ThreadPool(poolsize)
>>> requests = makeRequests(some_callable, list_of_args, callback)
>>> [pool.putRequest(req) for req in requests]
>>> pool.wait()
二、multiprocessing 标准库中有,通过创建 Process
对象
1、pool类
from multiprocessing import Pool
def f(x):
return x*x
if __name__ == '__main__':
with Pool(5) as p:
print(p.map(f, [1, 2, 3]))
2、process类 (start和join方法)
from multiprocessing import Process
def f(name):
print('hello', name)
if __name__ == '__main__':
p = Process(target=f, args=('bob',))
p.start()
p.join()
3、线程启动的三种方法
spawn 父进程产出子进程,只会继承run()方法的部分资源
fork 通过 os.fork()产出子进程,全部继承,不安全
forkserver,单线程,安全,不全部继承资源
通过set_start_method()选择创建进程方法,最多只能用一次
通过get_context()一个程序可以多个启动方法,context对象和
multiprocessing具有相同的api:
import multiprocessing as mp
def foo(q):
q.put('hello')
if __name__ == '__main__':
mp.set_start_method('spawn')
q = mp.Queue()
p = mp.Process(target=foo, args=(q,))
p.start()
print(q.get())
p.join()
if __name__ == '__main__':
ctx = mp.get_context('spawn')
q = ctx.Queue()
p = ctx.Process(target=foo, args=(q,))
p.start()
print(q.get())
p.join()
三、multiprocessing进程间通信
支持两种方法Queues和Pipes
Queue同queue.Queue,进程和线程安全
from multiprocessing import Process, Queue def f(q): q.put([42, None, 'hello']) if __name__ == '__main__': q = Queue() p = Process(target=f, args=(q,)) p.start() print(q.get()) # prints "[42, None, 'hello']" p.join()
Pipe()返回相互连接的对象,全双工,每个对象都有
send()and recv()方法,如果同时读写,可能会冲突from multiprocessing import Process, Pipe def f(conn): conn.send([42, None, 'hello']) conn.close() if __name__ == '__main__': parent_conn, child_conn = Pipe() p = Process(target=f, args=(child_conn,)) p.start() print(parent_conn.recv()) # prints "[42, None, 'hello']" p.join()
线程同步,和threading都中有对应元素,比如可以用lock保证数据同步打印
from multiprocessing import Process, Lock
def f(l, i):
l.acquire()
try:
print('hello world', i)
finally:
l.release()
if __name__ == '__main__':
lock = Lock()
for num in range(10):
Process(target=f, args=(lock, num)).start()
进程间共享状态(应尽量避免)
from multiprocessing import Process, Value, Array
def f(n, a):
n.value = 3.1415927
for i in range(len(a)):
a[i] = -a[i]
if __name__ == '__main__':
num = Value('d', 0.0)
arr = Array('i', range(10))
p = Process(target=f, args=(num, arr))
p.start()
p.join()
print(num.value)
print(arr[:])
服务器进程:Manager类的对象可以管理python类型的数据,其他进程可以通过代理访问其数据,manager对象支持如下类型
list
, dict
, Namespace
, Lock
, RLock
, Semaphore
, BoundedSemaphore
, Condition
, Event
, Barrier
, Queue
, Value
and Array
from multiprocessing import Process, Manager
def f(d, l):
d[1] = '1'
d['2'] = 2
d[0.25] = None
l.reverse()
if __name__ == '__main__':
with Manager() as manager:
d = manager.dict()
l = manager.list(range(10))
p = Process(target=f, args=(d, l))
p.start()
p.join()
print(d)
print(l)
工作进程池 from multiprocessing import Pool, TimeoutError import time
import os def f(x): return x*x if __name__ == '__main__': # start 4 worker processes with Pool(processes=4) as pool: # print "[0, 1, 4,..., 81]" print(pool.map(f, range(10)))
除了map方法,还有imap_unordered()无序计算,apply_async()同步
from multiprocessing import Pool, TimeoutError
import time
import os
def f(x):
return x*x
if __name__ == '__main__':
# start 4 worker processes
with Pool(processes=4) as pool:
# print "[0, 1, 4,..., 81]"
print(pool.map(f, range(10)))
pool.imap_unordered(f, range(10))
res = pool.apply_async(time.sleep, (10,))
三、多进程和多线程区别
Python多线程不能利用CPU多核优势,IO密集型可用多线程,CPU密集型适合用多进程
定义需要并发处理的函数
import random
def Test(a, b):
time.sleep(random.randint(5, 20))
print(str(a) + '_' + str(b) + '\t'
线程池
-
import random import threadpool def MultiThreadTest(): pool = threadpool.ThreadPool(20) li = [] for i in range(1000): li.append((None, {'a': i, 'b': i + 10})) requests = threadpool.makeRequests(Test, li) [pool.putRequest(req) for req in requests] pool.wait()
- 进程池
-
import multiprocessing def MultiProcessTest(): pool = multiprocessing.Pool(processes = 4) for i in range(1000): pool.apply_async(Test, (i, i + 10, )) pool.close() pool.join()
-
共享数据
多线程可以用Python的Queue共享数据,多进程要用multiprocessing.Queue。
-
import multiprocessing def Test(a, b, mpDict): print(str(a) + "test", b) mpDict[str(a) + "test"] = b def MultiProcessTest(): pool = multiprocessing.Pool(processes=4) mpDict = multiprocessing.Manager().dict() for i in range(5): pool.apply_async(Test, (i, i + 10, mpDict, )) pool.close() pool.join() traditionDict = dict(mpDict) print(traditionDict)
-
生产者-消费者 模型
- Pool 共享 Queue 有个 坑,
multiprocessing.Queue()
只支持 Process 出来的进程,不支持 Pool 的,在 Pool 中需要使用multiprocessing.Manager()
# 生产者
def write(q):
a = np.random.randint(0, 100, (100, 2, 2))
for value in range(a.shape[0]):
print('Put %s to queue...\n' % a[value])
q.put(a[value])
print(q.qsize())
# 消费者:
def read(q):
while True:
# get的参数是 block=True, timeout=None
# block表示队列空时是阻塞等待还是抛出异常
# timeout指等待一定时间抛出异常,还是无限等待。
value = q.get(True)
print('Get %s from queue.\n' % value)
print(q.qsize())
time.sleep(random.random())
def test_pool():
manager = mp.Manager()
q = manager.Queue(2)
pw = Process(target=write, args=(q,))
pw.start()
worker_num = 4
pool = mp.Pool(processes=worker_num)
for i in range(worker_num):
print('start data worker ' + str(i))
pool.apply_async(read, (q, ))
pool.close()
pw.join()
pool.join()