Python多进程编程
1 多进程编程
1.1 multiprocessing.Process
import multiprocessing
#多进程编程
import time
def get_html(n):
time.sleep(n)
print("sub_progress success")
return n
if __name__ == "__main__":
progress = multiprocessing.Process(target=get_html, args=(2,))
print(progress.pid)
progress.start()
print(progress.pid)
progress.join()
print("main progress end")
None
16868
sub_progress success
main progress end
1.2 multiprocessing.Pool
import multiprocessing
#多进程编程
import time
def get_html(n):
time.sleep(n)
print("sub_progress success")
return n
if __name__ == "__main__":
# 使用线程池
pool = multiprocessing.Pool(multiprocessing.cpu_count())
# result = pool.apply_async(get_html, args=(3,))
#
# #等待所有任务完成
# pool.close()
# pool.join()
#
# print(result.get())
#imap
for result in pool.imap(get_html, [1,5,3]):
print("{} sleep success".format(result))
#
# for result in pool.imap_unordered(get_html, [1,5,3]):
# print("{} sleep success".format(result))
sub_progress success
1 sleep success
sub_progress success
sub_progress success
5 sleep success
3 sleep success
1.3 ProcessPoolExecutor
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
#多进程编程
def random_sleep(n):
time.sleep(n)
return n
if __name__ == "__main__":
with ProcessPoolExecutor(3) as executor:
all_task = [executor.submit(random_sleep, (num)) for num in [2]*30]
start_time = time.time()
for future in as_completed(all_task):
data = future.result()
print("exe result: {}".format(data))
print("last time is: {}".format(time.time()-start_time))
exe result: 2
......
exe result: 2
last time is: 20.30926513671875
2 多线程与多进程比较
2.1 计算密集型操作
2.1.1 使用多线程编程
import time
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
#耗cpu的操作,用多进程编程, 对于io操作来说, 使用多线程编程,进程切换代价要高于线程
#1. 对于耗费cpu的操作,多进程优于多线程
def fib(n):
if n<=2:
return 1
return fib(n-1)+fib(n-2)
if __name__ == "__main__":
with ThreadPoolExecutor(3) as executor:
all_task = [executor.submit(fib, (num)) for num in range(25,40)]
start_time = time.time()
for future in as_completed(all_task):
data = future.result()
print("exe result: {}".format(data))
print("last time is: {}".format(time.time()-start_time))
last time is: 40.99148607254028
2.1.2 使用多进程编程
import time
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
#耗cpu的操作,用多进程编程, 对于io操作来说, 使用多线程编程,进程切换代价要高于线程
#1. 对于耗费cpu的操作,多进程优于多线程
def fib(n):
if n<=2:
return 1
return fib(n-1)+fib(n-2)
if __name__ == "__main__":
with ProcessPoolExecutor(3) as executor:
all_task = [executor.submit(fib, (num)) for num in range(25,40)]
start_time = time.time()
for future in as_completed(all_task):
data = future.result()
print("exe result: {}".format(data))
print("last time is: {}".format(time.time()-start_time))
last time is: 22.47068214416504
2.1.3 结果
相差将近2倍, 对于计算密集型操作,即耗费cpu的操作,多进程优于多线程
2.2 IO 密集型操作
2.2.1 使用多线程编程
import time
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
#耗cpu的操作,用多进程编程, 对于io操作来说, 使用多线程编程,进程切换代价要高于线程
#2. 对于io操作来说,多线程优于多进程
def random_sleep(n):
time.sleep(n)
return n
if __name__ == "__main__":
with ThreadPoolExecutor(3) as executor:
all_task = [executor.submit(random_sleep, (num)) for num in [5]*30]
start_time = time.time()
for future in as_completed(all_task):
data = future.result()
print("exe result: {}".format(data))
print("last time is: {}".format(time.time()-start_time))
last time is: 30.01819682121277
2.2.2 使用多进程编程
import time
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
#耗cpu的操作,用多进程编程, 对于io操作来说, 使用多线程编程,进程切换代价要高于线程
#2. 对于io操作来说,多线程优于多进程
def random_sleep(n):
time.sleep(n)
return n
if __name__ == "__main__":
with ProcessPoolExecutor(3) as executor:
all_task = [executor.submit(random_sleep, (num)) for num in [5]*30]
start_time = time.time()
for future in as_completed(all_task):
data = future.result()
print("exe result: {}".format(data))
print("last time is: {}".format(time.time()-start_time))
last time is: 30.32193922996521
2.2.3 结果
相差虽然不是很明显, 但是线程操作相对进程操作, 更容易进行通信处理, 对于io操作来说,多线程优于多进程
3 进程间通信
3.1 queue通信
3.1.1 Process中queue通信
import time
from multiprocessing import Process, Queue, Pool, Manager, Pipe
def producer(queue):
queue.put("a")
time.sleep(2)
def consumer(queue):
time.sleep(2)
data = queue.get()
print(data)
if __name__ == "__main__":
queue = Queue(10)
my_producer = Process(target=producer, args=(queue,))
my_consumer = Process(target=consumer, args=(queue,))
my_producer.start()
my_consumer.start()
my_producer.join()
my_consumer.join()
a
3.1.1 Pool中queue通信
import time
from multiprocessing import Process, Queue, Pool, Manager, Pipe
#multiprocessing中的queue不能用于pool进程池
#pool中的进程间通信需要使用manager中的queue
def producer(queue):
queue.put("a")
time.sleep(2)
def consumer(queue):
time.sleep(2)
data = queue.get()
print(data)
if __name__ == "__main__":
queue = Manager().Queue(10)
pool = Pool(2)
pool.apply_async(producer, args=(queue,))
pool.apply_async(consumer, args=(queue,))
pool.close()
pool.join()
a
3.2 共享全局变量(只适用于多线程)
import time
from multiprocessing import Process, Queue, Pool, Manager, Pipe
#共享全局变量通信
#共享全局变量不能适用于多进程编程,可以适用于多线程
def producer(a):
a += 100
time.sleep(2)
def consumer(a):
time.sleep(2)
print(a)
if __name__ == "__main__":
a = 1
my_producer = Process(target=producer, args=(a,))
my_consumer = Process(target=consumer, args=(a,))
my_producer.start()
my_consumer.start()
my_producer.join()
my_consumer.join()
1
共享全局变量不能适用于多进程编程,可以适用于多线程
3.3 pipe通信
import time
from multiprocessing import Process, Queue, Pool, Manager, Pipe
def producer(pipe):
pipe.send("pipe")
def consumer(pipe):
print(pipe.recv())
if __name__ == "__main__":
recevie_pipe, send_pipe = Pipe()
#pipe只能适用于两个进程
my_producer= Process(target=producer, args=(send_pipe, ))
my_consumer = Process(target=consumer, args=(recevie_pipe,))
my_producer.start()
my_consumer.start()
my_producer.join()
my_consumer.join()
pipe