多进程
os模块中的fork 仅适用于Unix/Linux系统; multiprocessing模块跨平台.
- 使用fork, 调用一次,返回两次,第一次为父进程返回子进程的pid,第二次为子进程返回0,可由getpid获取当前进程pid, getppid获取父进程pid.
if __name__ == '__main__': print("current Process is {}".format(os.getpid())) pid = os.fork() if pid<0: print("an error in fork") elif pid == 0: print("Fpid {} creates me {}".format(os.getppid(),os.getpid())) else: print("current pid {} creates a new pid {}".format(os.getpid(), pid))
- 使用process类对象,p=Process(target=函数,args=(,)) p.start 开始, p.join阻塞主进程
# def fun1(name): # print('测试%s多进程' %name) # # if __name__ == '__main__': # process_list = [] # for i in range(5): #开启5个子进程执行fun1函数 # p = Process(target=fun1,args=('Python',)) #实例化进程对象 # p.start() # process_list.append(p) # # for i in process_list: # p.join() # # print('结束测试')
- multiprocess的pool代表进程池对象 用于更多进程,若池中未满可加入新进程,已满则等待.
def task(name): print("task {} pid: {} is running...".format(name, os.getpid())) time.sleep(2) print("task {} ends.".format(name)) if __name__ == '__main__': print("current pid {}".format(os.getpid())) p=Pool(processes=3) for i in range(5): p.apply_async(task, args=(i,)) print("waiting subprocess done.") p.close() p.join() print("END")
- 进程间通信常使用queue和pipe方式 ;queue用于多进程间通信操作put插入数据blocked为true等待timeout,反之抛出异常queue.full. get操作为获取
def proc_send(q, stris): for s in stris: q.put(s,block=True,timeout=4) print("PID: {} send {} to Queue... time: {}".format(os.getpid(), s, time.time())) #time.sleep(random.random()) def proc_recv(q): print("PID: {} is reading.. time: {}".format(os.getpid(), time.time())) while 1: s = q.get(block=True, timeout=2) print("PID: {} get {} from Queue... time: {}".format(os.getpid(), s, time.time())) if __name__ == "__main__": q = Queue(maxsize=16) proc1 = Process(target=proc_send, args=(q,["a","b","c","d"],)) proc2 = Process(target=proc_send, args=(q,["1","2","3","4"],)) proc3 = Process(target=proc_recv, args=(q,)) proc1.start() proc2.start() proc3.start() proc1.join() proc2.join() proc3.terminate() #Pipe方法 def send(p,strs): for s in strs: p.send(s) print("PID: {} send {} to Pipe... time: {}".format(os.getpid(), s, time.time())) time.sleep(random.random()) def recv(p): print("Recving...") while True: s = p.recv() print("PID: {} get {} from Pipe... time: {}".format(os.getpid(), s, time.time())) if __name__ == "__main__": p = Pipe(duplex=False)#p[1]send p[0]recv proc1 = Process(target=send, args=(p[1],["test"+str(i) for i in range(10)],)) proc2 = Process(target=recv, args=(p[0],)) proc1.start() proc2.start() proc1.join() proc2.join()
多线程
Python中的thread和threading(高级)模块
1. 创建多线程的两种方式:把函数传入并创建一个thread实例调用start执行或直接从threading.Thread继承并创建线程类,然后重写__init__和run方法.
#线程
def thread_run(vals):
print("thread {} is running...".format(threading.current_thread().name))
for val in vals:
print("thread {} : val {}".format(threading.current_thread().name, val))
time.sleep(random.random())
print("thread {} end...".format(threading.current_thread().name))
print("{} is running...".format(threading.current_thread().name))
t1 = threading.Thread(target=thread_run, name="thread_1", args=(['a','b','c'],))
t2 = threading.Thread(target=thread_run,name="thread_2", args=(['1','2','3','4'],))
t1.start()
t2.start()
t1.join()
t2.join()
print("{} end...".format(threading.current_thread().name))
class myThread(threading.Thread):
def __init__(self,vals,name = None):
threading.Thread.__init__(self, name= name)
self.vals = vals
def run(self):
print("thread {} is running...".format(threading.current_thread().name))
print("copy: {}".format(self.name))
for val in self.vals:
print("thread {} : val {}".format(threading.current_thread().name, val))
time.sleep(random.random())
print("{} is running...".format(threading.current_thread().name))
t1 = myThread(name="thread_1", vals = ['a','b','c'])
t2 = myThread(name="thread_2", vals=['1','2','3','4'])
t1.start()
t2.start()
t1.join()
t2.join()
print("{} end...".format(threading.current_thread().name))
2.线程同步
Lock与RLock,对应操作acquire和release,前者只能一次acquire后release才能再次获取,后者增加了计数器可以多次acquire后依次release.
num = 0
mylock = threading.RLock()
class myThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
global num
while True:
time.sleep(random.random())
mylock.acquire()
print("{} locked number : {}".format(threading.current_thread().name, num))
if num>=4:
# mylock.acquire()
# num+=7
# mylock.release()
mylock.release()
print("{} released number : {}".format(threading.current_thread().name, num))
break
num+=1
print("{} released number : {}".format(threading.current_thread().name, num))
mylock.release()
print("{} is running...".format(threading.current_thread().name))
t1 = myThread()
t2 = myThread()
t1.start()
t2.start()
t1.join()
t2.join()
print("{} end...".format(threading.current_thread().name))
协程
略 使用gevent库
分布式进程
multiprocessing模块中managers子模块分布到多台机器;本质上是通过一个队列进行进程间的通讯,即将一个本地队列映射为网络队列.
#taskManager
import random, time
import multiprocessing
from multiprocessing import Queue
from multiprocessing.managers import BaseManager
task_queue = Queue()
result_queue = Queue()
class QueueManger(BaseManager):
pass
QueueManger.register('get_task_queue',callable=lambda :task_queue)
QueueManger.register('get_result_queue',callable=lambda :result_queue)
manager = QueueManger(address=('', 8001), authkey=b'lrz')
manager.start()
task = manager.get_task_queue()
result = manager.get_result_queue()
for url in ['THISURL_'+ str(i) for i in range(10)]:
print("put task {}...".format(url))
task.put(url)
print("try to get result...")
for i in range(10):
res = result.get(timeout = 5)
print("get result is {}....".format(res))
manager.shutdown()
#taskWorker
import random, time
import multiprocessing
from multiprocessing import Queue
from multiprocessing.managers import BaseManager
class QueueManger(BaseManager):
pass
QueueManger.register('get_task_queue')
QueueManger.register('get_result_queue')
sever_adress = '127.0.0.1'
print("Connect to the sever {}...".format(sever_adress))
manager = QueueManger(address=(sever_adress, 8001), authkey=b'lrz')
manager.connect()
task = manager.get_task_queue()
result = manager.get_result_queue()
while(not task.empty()):
url = task.get()
print("run task download {}...".format(url))
time.sleep(1)
result.put("{} success ...".format(url))
print("worker exit.")
#forLinux