python多线程好还是多协程好_python单线程,多线程和协程速度对比

在某些应用场景下,想要提高python的并发能力,可以使用多线程,或者协程。比如网络爬虫,数据库操作等一些IO密集型的操作。下面对比python单线程,多线程和协程在网络爬虫场景下的速度。

一,单线程。

单线程代

1 #!/usr/bin/env

2 # coding:utf8

3 #Author: hz_oracle

4

5 importMySQLdb6 importgevent7 importrequests8 importtime9

10

11 classDbHandler(object):12 def __init__(self, host, port, user, pwd, dbname):13 self.host =host14 self.port =port15 self.user =user16 self.pwd =pwd17 self.db =dbname18

19 defdb_conn(self):20 try:21 self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")22 self.cursor =self.conn.cursor()23 return 1

24 exceptException as e:25 return026

27 defget_urls(self, limitation):28 sql = """select pic from picurltable limit %s""" %limitation29 urls_list =list()30 try:31 self.cursor.execute(sql)32 fetchresult =self.cursor.fetchall()33 for line infetchresult:34 urls_list.append(line[0])35 printlen(urls_list)36 exceptException as e:37 print u"数据库查询失败:%s" %e38 return[]39 returnurls_list40

41 defdb_close(self):42 self.conn.close()43

44

45 defget_pic(url):46 try:47 pic_obj =requests.get(url).content48 exceptException as e:49 print u"图片出错"

50 return ""

51 filename = url.split('/')[-2]52 file_path = "./picture/" + filename + '.jpg'

53 fp = file(file_path, 'wb')54 fp.write(pic_obj)55 fp.close()56 return "ok"

57

58

59 defmain():60 start_time =time.time()61 db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')62 db_obj.db_conn()63 url_list = db_obj.get_urls(100)64 map(get_pic, url_list)65 #for url in url_list:

66 #get_pic(url)

67 end_time =time.time()68 costtime = float(end_time) -float(start_time)69 printcosttime70 print "download END"

71

72 if __name__ == "__main__":73 main()

运行结果

100

45.1282339096download END

单线程情况下,下载100张图片花了45秒。

再来看多线程的情况下。

#!/usr/bin/env python#coding:utf8#Author: hz_oracle

importMySQLdbimportgeventimportrequestsimporttimeimportthreadingimportQueue

lock1=threading.RLock()

url_queue=Queue.Queue()

urls_list=list()classDbHandler(object):def __init__(self, host, port, user, pwd, dbname):

self.host=host

self.port=port

self.user=user

self.pwd=pwd

self.db=dbnamedefdb_conn(self):try:

self.conn= MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")

self.cursor=self.conn.cursor()return 1

exceptException as e:return0defget_urls(self, limitation):

sql= """select pic from picurltable limit %s""" %limitationtry:

self.cursor.execute(sql)

fetchresult=self.cursor.fetchall()for line infetchresult:

url_queue.put(line[0])exceptException as e:print u"数据库查询失败:%s" %ereturn0return 1

defdb_close(self):

self.conn.close()classMyThread(threading.Thread):def __init__(self):

super(MyThread, self).__init__()defrun(self):

url=url_queue.get()try:

pic_obj=requests.get(url).contentexceptException as e:print u"图片出错"

return ""filename= url.split('/')[-2]

file_path= "./picture/" + filename + '.jpg'fp= file(file_path, 'wb')

fp.write(pic_obj)

fp.close()defmain():

start_time=time.time()

db_obj= DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')

db_obj.db_conn()

db_obj.get_urls(100)for i in range(100):

i=MyThread()

i.start()whileTrue:if threading.active_count()<=1:breakend_time=time.time()

costtime= float(end_time) -float(start_time)printcosttimeprint "download END"

if __name__ == "__main__":

main()

运行结果

15.408192873download END

启用100个线程发现只要花15秒即可完成任务,100个线程可能不是最优的方案,但较单线程有很明显的提升。接着再来看协程。

协程代码

#!/usr/bin/env python#coding:utf8#Author: hz_oracle

importMySQLdbimportrequestsimporttimeimportthreadingimportQueuefrom gevent importmonkey; monkey.patch_all()importgeventclassDbHandler(object):def __init__(self, host, port, user, pwd, dbname):

self.host=host

self.port=port

self.user=user

self.pwd=pwd

self.db=dbnamedefdb_conn(self):try:

self.conn= MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")

self.cursor=self.conn.cursor()return 1

exceptException as e:return0defget_urls(self, limitation):

urls_list=list()

sql= """select pic from picurltable limit %s""" %limitationtry:

self.cursor.execute(sql)

fetchresult=self.cursor.fetchall()for line infetchresult:

urls_list.append(line[0])exceptException as e:print u"数据库查询失败:%s" %ereturn[]returnurls_listdefdb_close(self):

self.conn.close()defget_pic(url):try:

pic_obj=requests.get(url).contentexceptException as e:print u"图片出错"

return ""filename= url.split('/')[-2]

file_path= "./picture/" + filename + '.jpg'fp= file(file_path, 'wb')

fp.write(pic_obj)

fp.close()return "ok"

defmain():

start_time=time.time()

db_obj= DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')

db_obj.db_conn()

url_list= db_obj.get_urls(100)

gevent.joinall([gevent.spawn(get_pic,url)for url inurl_list])

end_time=time.time()

costtime= float(end_time) -float(start_time)printcosttimeprint "download END"

if __name__ == "__main__":

main()

运行结果

10.6234440804download END

使用协程发现只花了10秒多,也就是三种方法中最快的。

总结:

三种方法中,单线程最慢,多线程次之,而协程最快。 不过如果对多线程进行优化,也可能变快,这里不讨论。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值