说到并发,我们想到多线程和多进程。
那到底使用多进程还是多线程呢?这得看情况。我们的程序一般分为:
1)耗网络的(有很大一部分时间是在网络交互);
2)耗CPU的(得充分利用多核)
在第一种情况,时间大部分被网络延时占用,所以使用多线程和多进程都差不多。
在第二种情况,时间的长短由CPU是否被充分利用决定,看《python第三方库系列之十二--多线程threading库》可知,多线程会出现锁机制,所以多进程更胜一筹,充分利用了CPU,节省了时间。
以下是一个多进程例子:
#coding=utf-8
import os
import traceback
import time
from multiprocessing import Process, Manager
CASE_COUNTS = 100
PROCESS_COUNTS = 10
def assign_task_index(index):
try:
if CASE_COUNTS % PROCESS_COUNTS == 0:
section_num = CASE_COUNTS / PROCESS_COUNTS
else:
section_num = CASE_COUNTS / PROCESS_COUNTS + 1
if CASE_COUNTS % section_num == 0:
process_count = CASE_COUNTS / section_num
else:
process_count = CASE_COUNTS / section_num + 1
start_index = section_num * (index - 1) + 1
end_index = section_num * index
if index == process_count:
end_index = CASE_COUNTS
#print start_index, end_index
return start_index, end_index
except Exception as e:
print traceback.format_exc()
def do_single_process(index, q):
start_index, stop_index = assign_task_index(index)
for i in range(start_index, stop_index+1):
try:
is_even_number = i % 2
if is_even_number == 0:
q.put(i)
else:
#print i
pass
except Exception as e:
print traceback.format_exc()
def do_multi_process():
try:
process_queue = []
m = Manager()
q = m.Queue() # 用于进程间通信,记录运行结果
for index in range(PROCESS_COUNTS) :
try :
pid = Process(target=do_single_process, args=(index+1, q))
except :
print traceback.format_exc()
os._exit(-1)
else :
process_queue.append(pid)
time_start = time.time()
for pid in process_queue :
pid.start()
# 等待所有进程都完成
for pid in process_queue:
pid.join()
time_end = time.time()
time_delta = time_end - time_start
print "总共个数:%s,偶数个数:%s,奇数个数:%s,耗时(秒):%s" % \
(str(CASE_COUNTS), str(q.qsize()), str(CASE_COUNTS - q.qsize()), str(time_delta))
except Exception as e:
print traceback.format_exc()
if __name__ == '__main__':
do_multi_process()