目前计算机程序一般会遇到两类I/O:硬盘I/O和网络I/O。我就针对网络I/O的场景分析下python3下进程、线程、协程效率的对比。进程采用multiprocessing.Pool进程池,线程是自己封装的进程池,协程采用gevent的库。用python3自带的urlllib.request和开源的requests做对比。代码如下:
import urllib.request
import requests
import time
import multiprocessing
import threading
import queue
def startTimer():
return time.time()
def ticT(startTime):
useTime = time.time() - startTime
return round(useTime, 3)
#def tic(startTime, name):
# useTime = time.time() - startTime
# print('[%s] use time: %1.3f' % (name, useTime))
def download_urllib(url):
req = urllib.request.Request(url,
headers={'user-agent': 'Mozilla/5.0'})
res = urllib.request.urlopen(req)
data = res.read()
try:
data = data.decode('gbk')
except UnicodeDecodeError:
data = data.decode('utf8', 'ignore')
return res.status, data
def download_requests(url):
req = requests.get(url,
headers={'user-agent': 'Mozilla/5.0'})
return req.status_code, req.text
class threadPoolManager:
def __init__(self,urls, workNum=10000,threadNum=20):
self.workQueue=queue.Queue()
self.threadPool=[]
self.__initWorkQueue(urls)
self.__initThreadPool(threadNum)
def __initWorkQueue(self,urls):
for i in urls:
self.workQueue.put((download_requests,i))
def __initThreadPool(self,threadNum):
for i in range(threadNum):
self.threadPool.app