Python爬虫基础—多线程
实例代码
''' 进程和线程 '''
''' os模块的fork方式 '''
import os
if __name__ == '__main__':
print('current Process (%s) start ... ' %(os.getpid()))
pid = os.fork()
if pid < 0:
print('error in fork')
elif pid == 0:
print('I am child process (%s) and my parent process is (%s)',(os.getpid(),os.getppid()))
else:
print('I (%s) created a child process (%s).',(os.getpid(),pid))
''' 使用multiprocessing模块创建多进程 '''
def shili1():
import os
from multiprocessing import Process
def run_proc(name):
print('Child process %s (%s) Running ... '%(name,os.getpid()))
if __name__ == '__main__':
print('Parent process %s.' % os.getpid())
for i in range(5):
p = Process(target=run_proc,args=(str(i),))
print('Process will start.')
p.start()
p.join()
print('Process end.')
''' Pool类-进程池对象 '''
from multiprocessing import Pool
import os,time,random
def run_task(name):
print('Task %s (pid = %s) is running...'%(name,os.getpid()))
time.sleep(random.random() * 3)
print('Task %s end.'%name)
if __name__ == '__main__':
print('Current process %s.' %os.getpid())
p = Pool(processes=3)
for i in range(5):
p.apply_async(run_task,args=(i,))
print('Waiting for all subprocesses done...')
p.close()
p.join()
print('All subpeocesses done.')
''' 进程间通信-- Queue '''
from multiprocessing import Process,Queue
import os,time,random
def proc_write(q,urls):
print('Process(%s) is writing...' %os.getpid())
for url in urls:
q.put(url)
print('Put %s to queue...'%url)
time.sleep(random.random())
def proc_read(q):
print('Process(%s) is reading...' %os.getpid())
while True:
url = q.get(True)
print('Get %s from queue.' %url)
if __name__ == '__main__':
q = Queue()
proc_write1 = Process(target=proc_write,args=(q,['url_1','url_2','url_3']))
proc_write2 = Process(target=proc_write, args=(q, ['url_4', 'url_5', 'url_6']))
proc_reader = Process(target=proc_read, args=(q,))
proc_write1.start()
proc_write2.start()
proc_reader.start()
proc_write1.join()
proc_write2.join()
proc_reader.terminate()
''' Pipe通信 '''
import multiprocessing
import random,os,time
def proc_send(pipe,urls):
for url in urls:
print('Process(%s) send: %s' %(os.getpid(),url))
pipe.send(url)
time.sleep(random.random())
def proc_recv(pipe):
while True:
print('Proess(%s) rev: %s' %(os.getpid(),pipe.recv()))
time.sleep(random.random())
if __name__=='__main__':
pipe = multiprocessing.Pipe()
p1 = multiprocessing.Process(target=proc_send,args=(pipe[0],['url_'+str(0)]))
p2 = multiprocessing.Process(target=proc_recv,args=(pipe[1],))
p1.start()
p2.start()
p1.join()
p2.join()