重新研究了一下 python 协程, 用 在 python2 中用 gevent/grequests 实现, 做了一些对比.
目的是想在 python2 的现有代码的基础上, 在改动量最小的前提下实现协程.
grequests 也是用 gevent 实现的, 对 requests 进行了一些补丁修改.
#! -*- coding:utf8
import gevent
import threading
import urllib2
from timeit import timeit
import requests
#import grequests # grequests 会进行 patch_all(thread=False, select=False), 测试多线程后再 import
from queue import Queue
from gevent import monkey
#monkey.patch_dns()
url_list = []
for i in range(100):
url_list.append('https://github.com/gevent/gevent/blob/master/src/gevent/monkey.py?id=%s' % i)
thread_count = 20
test_count = 1
def get(url):
try:
#data = urllib2.urlopen(url).read()
data = requests.get(url, timeout=10).content
#data = grequests.map([grequests.get(url)])[0].content
#print('%d bytes received from %s.' % (len(data), url))
except BaseException:
pass
#print('-1 bytes received from %s.' % url)
def cusmer(q):
while not q.empty():
url = q.get()
get(url)
class GT(gevent.Greenlet):
def __init__(self, q):
gevent.Greenlet.__init__(self)
self.q = q
def _run(self):
while not self.q.empty():
url = self.q.get()
get(url)
def test():
glist = []
for i in url_list:
glist.append(gevent.spawn(get, i))
gevent.joinall(glist)
def test1():
ts = []
q = Queue()
for i in url_list:
q.put(i)
for i in range(thread_count):
ts.append( threading.Thread(target=cusmer, args=(q,)) )
for i in ts:
i.start()
for i in ts:
i.join()
def test2():
import grequests
for i in grequests.imap([grequests.get(i,timeout=10) for i in url_list], size=thread_count):
try:
len(i.content)
except BaseException:
pass
def test3():
glist = []
q = Queue()
for i in url_list:
q.put(i)
for i in range(thread_count):
glist.append(gevent.spawn(cusmer, q))
gevent.joinall(glist)
def test4():
ts = []
q = Queue()
for i in url_list:
q.put(i)
for i in range(thread_count):
ts.append(GT(q) )
for i in ts:
i.start()
for i in ts:
i.join()
print "test1 thread:", timeit("test1()","from __main__ import *", number=test_count)
#monkey.patch_socket()
#monkey.patch_thread()
monkey.patch_select()
#monkey.patch_all()
print "test1 thread(patch):", timeit("test1()","from __main__ import *", number=test_count)
monkey.patch_all()
print "test spawn all(patch)", timeit("test()","from __main__ import *", number=test_count)
print "test2 grequests:", timeit("test2()","from __main__ import *", number=test_count)
print "test3 gevent.spawn", timeit("test3()","from __main__ import *", number=test_count)
print "test4 gevent.Greenlet", timeit("test4()","from __main__ import *", number=test_count)
test1 thread: 9.75339603424
test1 thread(patch): 2.32443594933
test spawn all(patch): 2.04020094872
test2 grequests: 2.95118093491
test3 gevent.spawn: 2.9419798851
test4 gevent.Greenlet: 3.03724193573
测试线程的时候, 首次 9s 多, 打补丁之后, 2秒多.
主要的补丁包邮 patch_socket, patch_thread, patch_select
一般多线程时间: 7-10s
patch_socket
: 7-10s
patch_thread
: 速度非常非常慢
patch_socket
+patch_thread
: 速度非常非常慢
patch_select
: 1.5-2s
patch_select
+patch_socket
: 1.5-2s
patch_select
+patch_thread
: 31s
patch_select
+patch_socket
+patch_thread
: 2-3s
patch_all
: 2-3s
从速度上来看, patch_select 是最快的, patch_thread 后话费时间稍微有些长.
总结: 在 python2 已有代码基础上实现协程, 最简单最有效最快速的办法: 使用 gevent.monkey
from gevent import monkey; monkey.patch_all();
# 或者 monkey.patch_select();