Python 学习笔记 - 多进程和进程池-CSDN博客

前面学习了多线程，接下来学习多进程的创建和使用。多进程更适合计算密集型的操作，他的语法和多线程非常相像，唯一需要注意的是，多线程之间是可以直接共享内存数据的；但是多进程默认每个进程是不能访问其他进程（程序）的内容。我们可以通过一些特殊的方式（队列，数组和字典）来实现，注意这几个数据结构和平常使用的不太一样，是在多进程中特殊定义的。

例如：通过queue来共享数据

 
     
          #!/usr/bin/env python 
         
 
          # -*- coding:utf-8 -*- 
         
 
          # Author:Alex Li 
         
 
          from  
          multiprocessing  
          import  
          Process 
         
 
          from  
          multiprocessing  
          import  
          queues 
         
 
          import  
          multiprocessing 
         
 
          from  
          multiprocessing  
          import  
          Array 
         
 
          def  
          foo(i,arg): 
         
 
               
          arg.put(i) 
         
 
               
          print 
          ( 
          'say hi' 
          ,i,arg.qsize()) 
         
 
          if  
          __name__  
          = 
          =  
          "__main__" 
          : 
         
 
               
          # li = [] 
         
 
               
          li  
          =  
          queues.Queue( 
          20 
          ,ctx 
          = 
          multiprocessing) 
         
 
               
          for  
          i  
          in  
          range 
          ( 
          10 
          ): 
         
 
                   
          p  
          =  
          Process(target 
          = 
          foo,args 
          = 
          (i,li,)) 
         
 
                   
          p.start() 
         
 
                   
          p.join() 
         
 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
         
 
          say hi  
          0  
          1 
         
 
          say hi  
          1  
          2 
         
 
          say hi  
          2  
          3 
         
 
          say hi  
          3  
          4 
         
 
          say hi  
          4  
          5 
         
 
          say hi  
          5  
          6 
         
 
          say hi  
          6  
          7 
         
 
          say hi  
          7  
          8 
         
 
          say hi  
          8  
          9 
         
 
          say hi  
          9  
          10 
         
 
   

例2 通过array来共享数据，注意array初始化的时候就需要固定数据类型和长度

 
          from  
          multiprocessing  
          import  
          Process 
         
          from  
          multiprocessing  
          import  
          queues 
         
          import  
          multiprocessing 
         
          from  
          multiprocessing  
          import  
          Array 
         
          def  
          foo(i,arg): 
         
          arg[i]  
          =  
          i  
          +  
          100 
         
          for  
          item  
          in  
          arg: 
         
          print 
          (item) 
         
          print 
          ( 
          '================' 
          ) 
         
          if  
          __name__  
          = 
          =  
          "__main__" 
          : 
         
          li  
          =  
          Array( 
          'i' 
          ,  
          10 
          ) 
         
          for  
          i  
          in  
          range 
          ( 
          10 
          ): 
         
          p  
          =  
          Process(target 
          = 
          foo,args 
          = 
          (i,li,)) 
         
          p.start() 
         
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
         
          0 
         
          0 
         
          0 
         
          0 
         
          0 
         
          0 
         
          0 
         
          107 
         
          0 
         
          0 
         
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
         
          0 
         
          0 
         
          0 
         
          0 
         
          0 
         
          0 
         
          0 
         
          107 
         
          108 
         
          0 
         
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
         
          0 
         
          101 
         
          0 
         
          0 
         
          0 
         
          0 
         
          0 
         
          107 
         
          108 
         
          0 
         
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
         
          0 
         
          101 
         
          0 
         
          0 
         
          0 
         
          0 
         
          106 
         
          107 
         
          108 
         
          0 
         
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
         
          0 
         
          101 
         
          0 
         
          0 
         
          0 
         
          105 
         
          106 
         
          107 
         
          108 
         
          0 
         
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
          = 
         
          ...(等等省略）

例3 通过字典方式进程间共享

 
     
          #!/usr/bin/env python 
         
 
          # -*- coding:utf-8 -*- 
         
 
          # Author:Alex Li 
         
 
          from  
          multiprocessing  
          import  
          Process 
         
 
          from  
          multiprocessing  
          import  
          queues 
         
 
          import  
          multiprocessing 
         
 
          from  
          multiprocessing  
          import  
          Manager 
         
 
          def  
          foo(i,arg): 
         
 
              
         
 
               
          arg[i]  
          =  
          i  
          +  
          100 
         
 
               
          print 
          (arg.values()) 
         
 
          if  
          __name__  
          = 
          =  
          "__main__" 
          : 
         
 
               
          # li = [] 
         
 
               
          # li = queues.Queue(20,ctx=multiprocessing) 
         
 
               
          obj  
          =  
          Manager() 
         
 
               
          li  
          =  
          obj. 
          dict 
          () 
         
 
               
          for  
          i  
          in  
          range 
          ( 
          10 
          ): 
         
 
                   
          p  
          =  
          Process(target 
          = 
          foo,args 
          = 
          (i,li,)) 
         
 
                   
          p.start() 
         
 
                   
          p.join()  
         
 
            
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
         
 
          [ 
          100 
          ] 
         
 
          [ 
          100 
          ,  
          101 
          ] 
         
 
          [ 
          100 
          ,  
          101 
          ,  
          102 
          ] 
         
 
          [ 
          100 
          ,  
          101 
          ,  
          102 
          ,  
          103 
          ] 
         
 
          [ 
          100 
          ,  
          101 
          ,  
          102 
          ,  
          103 
          ,  
          104 
          ] 
         
 
          [ 
          100 
          ,  
          101 
          ,  
          102 
          ,  
          103 
          ,  
          104 
          ,  
          105 
          ] 
         
 
          [ 
          100 
          ,  
          101 
          ,  
          102 
          ,  
          103 
          ,  
          104 
          ,  
          105 
          ,  
          106 
          ] 
         
 
          [ 
          100 
          ,  
          101 
          ,  
          102 
          ,  
          103 
          ,  
          104 
          ,  
          105 
          ,  
          106 
          ,  
          107 
          ] 
         
 
          [ 
          100 
          ,  
          101 
          ,  
          102 
          ,  
          103 
          ,  
          104 
          ,  
          105 
          ,  
          106 
          ,  
          107 
          ,  
          108 
          ] 
         
 
          [ 
          100 
          ,  
          101 
          ,  
          102 
          ,  
          103 
          ,  
          104 
          ,  
          105 
          ,  
          106 
          ,  
          107 
          ,  
          108 
          ,  
          109 
          ] 
         
 
   

和线程类似，当多个进程操作同一个全局变量的时候，需要加锁，不然可能错误；

比如

 
     
          #!/usr/bin/env python 
         
 
          # -*- coding:utf-8 -*- 
         
 
          # Author:Alex Li 
         
 
          from  
          multiprocessing  
          import  
          Process 
         
 
          from  
          multiprocessing  
          import  
          queues 
         
 
          from  
          multiprocessing  
          import  
          Array 
         
 
          from  
          multiprocessing  
          import  
          RLock, Lock, Event, Condition, Semaphore 
         
 
          import  
          multiprocessing 
         
 
          import  
          time 
         
 
          def  
          foo(i,lis): 
         
 
               
          lis[ 
          0 
          ]  
          =  
          lis[ 
          0 
          ]  
          -  
          1 
         
 
               
          time.sleep( 
          1 
          ) 
         
 
               
          print 
          ( 
          'say hi' 
          ,lis[ 
          0 
          ]) 
         
 
          if  
          __name__  
          = 
          =  
          "__main__" 
          : 
         
 
               
          # li = [] 
         
 
               
          li  
          =  
          Array( 
          'i' 
          ,  
          1 
          ) 
         
 
               
          li[ 
          0 
          ]  
          =  
          10 
         
 
               
          for  
          i  
          in  
          range 
          ( 
          10 
          ): 
         
 
                   
          p  
          =  
          Process(target 
          = 
          foo,args 
          = 
          (i,li)) 
         
 
                   
          p.start() 
         
 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
         
 
          say hi  
          0 
         
 
          say hi  
          0 
         
 
          say hi  
          0 
         
 
          say hi  
          0 
         
 
          say hi  
          0 
         
 
          say hi  
          0 
         
 
          say hi  
          0 
         
 
          say hi  
          0 
         
 
          say hi  
          0 
         
 
          say hi  
          0 
         
 
   

如何修复？

两种方式，一个是p.start()下面加个p.join(),那真的就算按顺序一个个执行了；还有一个方式就是加锁

 
          #!/usr/bin/env python 
         
          # -*- coding:utf-8 -*- 
         
          # Author:Alex Li 
         
          from  
          multiprocessing  
          import  
          Process 
         
          from  
          multiprocessing  
          import  
          queues 
         
          from  
          multiprocessing  
          import  
          Array 
         
          from  
          multiprocessing  
          import  
          RLock, Lock, Event, Condition, Semaphore 
         
          import  
          multiprocessing 
         
          import  
          time 
         
          def  
          foo(i,lis,lc): 
         
          lc.acquire() 
         
          lis[ 
          0 
          ]  
          =  
          lis[ 
          0 
          ]  
          -  
          1 
         
          time.sleep( 
          1 
          ) 
         
          print 
          ( 
          'say hi' 
          ,lis[ 
          0 
          ]) 
         
          lc.release() 
         
          if  
          __name__  
          = 
          =  
          "__main__" 
          : 
         
          # li = [] 
         
          li  
          =  
          Array( 
          'i' 
          ,  
          1 
          ) 
         
          li[ 
          0 
          ]  
          =  
          10 
         
          lock  
          =  
          RLock() 
         
          for  
          i  
          in  
          range 
          ( 
          10 
          ): 
         
          p  
          =  
          Process(target 
          = 
          foo,args 
          = 
          (i,li,lock)) 
         
          p.start() 
         
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
         
          say hi  
          9 
         
          say hi  
          8 
         
          say hi  
          7 
         
          say hi  
          6 
         
          say hi  
          5 
         
          say hi  
          4 
         
          say hi  
          3 
         
          say hi  
          2 
         
          say hi  
          1 
         
          say hi  
          0

和线程池相比，Python已经提供了完备的进程池模块，因此可以直接使用。进程池里面有2种方法，apply或apply_async；前者是阻塞，而后者是非阻塞的

例如下面例子我使用的apply_async,那么所有的进程是（非阻塞）同时执行的，当执行到time.sleep(5),每个子线程会卡5秒，而同时主线程执行到了pool.terminate(),这个时候就直接终止程序了

 
          #!/usr/bin/env python 
         
          # -*- coding:utf-8 -*- 
         
          from  
          multiprocessing  
          import  
          Pool 
         
          import  
          time 
         
          def  
          f1(arg): 
         
          print 
          (arg, 
          'b' 
          ) 
         
          time.sleep( 
          5 
          ) 
         
          print 
          (arg, 
          'a' 
          ) 
         
          if  
          __name__  
          = 
          =  
          "__main__" 
          : 
         
          pool  
          =  
          Pool( 
          5 
          ) 
         
          for  
          i  
          in  
          range 
          ( 
          30 
          ): 
         
          # pool.apply(func=f1,args=(i,))#按照顺序执行 
         
          pool.apply_async(func 
          = 
          f1,args 
          = 
          (i,)) 
          #同时执行 
         
          # pool.close() # 所有的任务执行完毕 
         
          time.sleep( 
          2 
          ) 
         
          pool.terminate()  
          # 立即终止 
         
          pool.join() 
         
          pass 
         
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
         
          "C:\Program Files\Python3\python.exe"  
          C: 
          / 
          temp 
          / 
          s13day11 
          / 
          day11 
          / 
          s16.py 
         
          0  
          b 
         
          1  
          b 
         
          2  
          b 
         
          3  
          b 
         
          4  
          b

如果改成close(),那么他会等待pool中的任务执行完成之后再中止程序

 
          from  
          multiprocessing  
          import  
          Pool 
         
          import  
          time 
         
          def  
          f1(arg): 
         
          print 
          (arg, 
          'b' 
          ) 
         
          time.sleep( 
          5 
          ) 
         
          print 
          (arg, 
          'a' 
          ) 
         
          if  
          __name__  
          = 
          =  
          "__main__" 
          : 
         
          pool  
          =  
          Pool( 
          5 
          ) 
         
          for  
          i  
          in  
          range 
          ( 
          30 
          ): 
         
          # pool.apply(func=f1,args=(i,))#按照顺序执行 
         
          pool.apply_async(func 
          = 
          f1,args 
          = 
          (i,)) 
          #同时执行 
         
          pool.close()  
          # 所有的任务执行完毕 
         
          time.sleep( 
          2 
          ) 
         
          # pool.terminate() # 立即终止 
         
          pool.join() 
         
          pass 
         
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
          - 
         
          "C:\Program Files\Python3\python.exe"  
          C: 
          / 
          temp 
          / 
          s13day11 
          / 
          day11 
          / 
          s16.py 
         
          0  
          b 
         
          1  
          b 
         
          2  
          b 
         
          3  
          b 
         
          4  
          b 
         
          0  
          a 
         
          5  
          b 
         
          1  
          a 
         
          6  
          b 
         
          2  
          a 
         
          7  
          b 
         
          3  
          a 
         
          8  
          b 
         
          4  
          a 
         
          9  
          b 
         
          5  
          a 
         
          10  
          b 
         
          6  
          a 
         
          11  
          b 
         
          7  
          a 
         
          8  
          a 
         
          12  
          b 
         
          13  
          b 
         
          9  
          a 
         
          14  
          b 
         
          10  
          a 
         
          15  
          b 
         
          11  
          a 
         
          16  
          b 
         
          13  
          a 
         
          12  
          a 
         
          18  
          b 
         
          17  
          b 
         
          14  
          a 
         
          19  
          b 
         
          15  
          a 
         
          20  
          b 
         
          16  
          a 
         
          21  
          b 
         
          17  
          a 
         
          18  
          a 
         
          22  
          b 
         
          23  
          b 
         
          19  
          a 
         
          24  
          b 
         
          20  
          a 
         
          25  
          b 
         
          21  
          a 
         
          26  
          b 
         
          22  
          a 
         
          27  
          b 
         
          23  
          a 
         
          28  
          b 
         
          24  
          a 
         
          29  
          b 
         
          25  
          a 
         
          26  
          a 
         
          27  
          a 
         
          28  
          a 
         
          29  
          a