Python threadpool与multiprocessing

一、Threadpool,需要通过pip安装包 [sudo] pip install threadpool

旧的线程方法,建议使用multiprocessing

>>> pool = ThreadPool(poolsize)
>>> requests = makeRequests(some_callable, list_of_args, callback)
>>> [pool.putRequest(req) for req in requests]
>>> pool.wait()
二、multiprocessing 标准库中有,通过创建 Process对象
1、pool类
from multiprocessing import Pool

def f(x):
    return x*x

if __name__ == '__main__':
    with Pool(5) as p:
        print(p.map(f, [1, 2, 3]))
2、process类  (start和join方法)

from multiprocessing import Process

def f(name):
    print('hello', name)

if __name__ == '__main__':
    p = Process(target=f, args=('bob',))
    p.start()
    p.join()

3、线程启动的三种方法

spawn 父进程产出子进程,只会继承run()方法的部分资源

fork 通过 os.fork()产出子进程,全部继承,不安全

forkserver,单线程,安全,不全部继承资源

 通过set_start_method()选择创建进程方法,最多只能用一次

通过get_context()一个程序可以多个启动方法,context对象和 multiprocessing具有相同的api:

import multiprocessing as mp

def foo(q):
    q.put('hello')

if __name__ == '__main__':
    mp.set_start_method('spawn')
    q = mp.Queue()
    p = mp.Process(target=foo, args=(q,))
    p.start()
    print(q.get())
    p.join()

if __name__ == '__main__':
    ctx = mp.get_context('spawn')
    q = ctx.Queue()
    p = ctx.Process(target=foo, args=(q,))
    p.start()
    print(q.get())
    p.join()

三、multiprocessing进程间通信

支持两种方法Queues和Pipes

Queue同queue.Queue,进程和线程安全
from multiprocessing import Process, Queue
def f(q):
    q.put([42, None, 'hello'])

if __name__ == '__main__':
    q = Queue()
    p = Process(target=f, args=(q,))
    p.start()
    print(q.get())    # prints "[42, None, 'hello']"
    p.join()
Pipe()返回相互连接的对象,全双工,每个对象都有 send()and recv()方法,如果同时读写,可能会冲突
from multiprocessing import Process, Pipe

def f(conn):
    conn.send([42, None, 'hello'])
    conn.close()

if __name__ == '__main__':
    parent_conn, child_conn = Pipe()
    p = Process(target=f, args=(child_conn,))
    p.start()
    print(parent_conn.recv())   # prints "[42, None, 'hello']"
    p.join()

 

线程同步,和threading中有对应元素,比如可以用lock保证数据同步打印
from multiprocessing import Process, Lock

def f(l, i):
    l.acquire()
    try:
        print('hello world', i)
    finally:
        l.release()

if __name__ == '__main__':
    lock = Lock()

    for num in range(10):
        Process(target=f, args=(lock, num)).start()

进程间共享状态(应尽量避免)

共享内存,通过Value or Array共享数据

from multiprocessing import Process, Value, Array

def f(n, a):
    n.value = 3.1415927
    for i in range(len(a)):
        a[i] = -a[i]

if __name__ == '__main__':
    num = Value('d', 0.0)
    arr = Array('i', range(10))

    p = Process(target=f, args=(num, arr))
    p.start()
    p.join()

    print(num.value)
    print(arr[:])

服务器进程:Manager类的对象可以管理python类型的数据,其他进程可以通过代理访问其数据,manager对象支持如下类型

listdictNamespaceLockRLockSemaphoreBoundedSemaphoreConditionEventBarrierQueueValue and Array

from multiprocessing import Process, Manager

def f(d, l):
    d[1] = '1'
    d['2'] = 2
    d[0.25] = None
    l.reverse()

if __name__ == '__main__':
    with Manager() as manager:
        d = manager.dict()
        l = manager.list(range(10))

        p = Process(target=f, args=(d, l))
        p.start()
        p.join()

        print(d)
        print(l)
工作进程池
from multiprocessing import Pool, TimeoutError
import timeimport os

def f(x):
    return x*x

if __name__ == '__main__':
    # start 4 worker processes
    with Pool(processes=4) as pool:
        # print "[0, 1, 4,..., 81]"
        print(pool.map(f, range(10)))

除了map方法,还有imap_unordered()无序计算,apply_async()同步

from multiprocessing import Pool, TimeoutError
import time
import os

def f(x):
    return x*x

if __name__ == '__main__':
    # start 4 worker processes
    with Pool(processes=4) as pool:
        # print "[0, 1, 4,..., 81]"
        print(pool.map(f, range(10)))

pool.imap_unordered(f, range(10))
res = pool.apply_async(time.sleep, (10,))

三、多进程和多线程区别

Python多线程不能利用CPU多核优势,IO密集型可用多线程,CPU密集型适合用多进程

定义需要并发处理的函数

import random
def Test(a, b):
    time.sleep(random.randint(5, 20))
    print(str(a) + '_' + str(b) + '\t'

线程池


  • import random
    import threadpool
    def MultiThreadTest():
        pool = threadpool.ThreadPool(20)
        li = []
        for i in range(1000):
            li.append((None, {'a': i, 'b': i + 10}))
        requests = threadpool.makeRequests(Test, li)
        [pool.putRequest(req) for req in requests]
        pool.wait()
  • 进程池
  • import multiprocessing
    def MultiProcessTest():
        pool = multiprocessing.Pool(processes = 4)
        for i in range(1000):
            pool.apply_async(Test, (i, i + 10, ))
        pool.close()
        pool.join()
  • 共享数据

    多线程可以用Python的Queue共享数据,多进程要用multiprocessing.Queue。

  • import multiprocessing
    def Test(a, b, mpDict):
        print(str(a) + "test", b)
        mpDict[str(a) + "test"] = b
    def MultiProcessTest():
        pool = multiprocessing.Pool(processes=4)
        mpDict = multiprocessing.Manager().dict()
        for i in range(5):
            pool.apply_async(Test, (i, i + 10, mpDict, ))
        pool.close()
        pool.join()
        traditionDict = dict(mpDict)
        print(traditionDict)
  • 生产者-消费者 模型

  • Pool 共享 Queue 有个 multiprocessing.Queue() 只支持 Process 出来的进程,不支持 Pool 的,在 Pool 中需要使用 multiprocessing.Manager()

 

# 生产者
def write(q):
    a = np.random.randint(0, 100, (100, 2, 2))
    for value in range(a.shape[0]):
        print('Put %s to queue...\n' % a[value])
        q.put(a[value])
        print(q.qsize())
# 消费者:
def read(q):
    while True:
        # get的参数是 block=True, timeout=None
        # block表示队列空时是阻塞等待还是抛出异常
        # timeout指等待一定时间抛出异常,还是无限等待。
        value = q.get(True)
        print('Get %s from queue.\n' % value)
        print(q.qsize())
        time.sleep(random.random())
def test_pool():
    manager = mp.Manager()
    q = manager.Queue(2)
    pw = Process(target=write, args=(q,))
    pw.start()
    worker_num = 4
    pool = mp.Pool(processes=worker_num)
    for i in range(worker_num):
        print('start data worker ' + str(i))
        pool.apply_async(read, (q, ))
    pool.close()
    pw.join()
    pool.join()
  1.  
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值