python单线程,多线程和协程速度对比

(现在人工智能非常火爆,很多朋友都想学,但是一般的教程都是为博硕生准备的,太难看懂了。最近发现了一个非常适合小白入门的教程,不仅通俗易懂而且还很风趣幽默。所以忍不住分享一下给大家。

  点这里https://www.cbedai.net/ialexanderi可以跳转到教程。)

在某些应用场景下,想要提高python的并发能力,可以使用多线程,或者协程。比如网络爬虫,数据库操作等一些IO密集型的操作。下面对比python单线程,多线程和协程在网络爬虫场景下的速度。

一,单线程。

  单线程代 

复制代码

 1 #!/usr/bin/env 
 2 # coding:utf8 
 3 # Author: hz_oracle
 4 
 5 import MySQLdb
 6 import gevent
 7 import requests
 8 import time
 9 
10 
11 class DbHandler(object):
12     def __init__(self, host, port, user, pwd, dbname):
13         self.host = host
14         self.port = port
15         self.user = user
16         self.pwd = pwd
17         self.db = dbname
18 
19     def db_conn(self):
20         try:
21             self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
22             self.cursor = self.conn.cursor()
23             return 1
24         except Exception as e:
25             return 0
26 
27     def get_urls(self, limitation):
28         sql = """select pic  from  picurltable limit  %s""" % limitation
29         urls_list = list()
30         try:
31             self.cursor.execute(sql)
32             fetchresult = self.cursor.fetchall()
33             for line in fetchresult:
34                 urls_list.append(line[0])
35             print len(urls_list)
36         except Exception as e:
37             print u"数据库查询失败:%s"  % e
38             return []
39         return urls_list
40 
41     def db_close(self):
42         self.conn.close()
43 
44 
45 def get_pic(url):
46     try:
47         pic_obj = requests.get(url).content
48     except Exception as e:
49         print u"图片出错"
50         return ""
51     filename = url.split('/')[-2]
52     file_path = "./picture/" + filename + '.jpg'
53     fp = file(file_path, 'wb')
54     fp.write(pic_obj)
55     fp.close()
56     return "ok"
57 
58 
59 def main():
60     start_time = time.time()
61     db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
62     db_obj.db_conn()
63     url_list = db_obj.get_urls(100)
64     map(get_pic, url_list)
65     #for url in url_list:
66     #    get_pic(url)
67     end_time = time.time()
68     costtime = float(end_time) - float(start_time)
69     print costtime
70     print "download END"
71 
72 if __name__ == "__main__":
73     main()

复制代码

  运行结果

100
45.1282339096
download END

单线程情况下,下载100张图片花了45秒。

再来看多线程的情况下。

复制代码

#!/usr/bin/env python
# coding:utf8
# Author: hz_oracle

import MySQLdb
import gevent
import requests
import time
import threading
import Queue

lock1 = threading.RLock()
url_queue = Queue.Queue()
urls_list = list()


class DbHandler(object):
    def __init__(self, host, port, user, pwd, dbname):
        self.host = host
        self.port = port
        self.user = user
        self.pwd = pwd
        self.db = dbname

    def db_conn(self):
        try:
            self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
            self.cursor = self.conn.cursor()
            return 1
        except Exception as e:
            return 0

    def get_urls(self, limitation):
        sql = """select pic  from  picurltable limit  %s""" % limitation
        try:
            self.cursor.execute(sql)
            fetchresult = self.cursor.fetchall()
            for line in fetchresult:
                url_queue.put(line[0])
        except Exception as e:
            print u"数据库查询失败:%s"  % e
            return 0
        return 1

    def db_close(self):
        self.conn.close()


class MyThread(threading.Thread):
    def __init__(self):
        super(MyThread, self).__init__()

    def run(self):
        url = url_queue.get()
        try:
            pic_obj = requests.get(url).content
        except Exception as e:
            print u"图片出错"
            return ""
        filename = url.split('/')[-2]
        file_path = "./picture/" + filename + '.jpg'
        fp = file(file_path, 'wb')
        fp.write(pic_obj)
        fp.close()


def main():
    start_time = time.time()
    db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
    db_obj.db_conn()
    db_obj.get_urls(100)
    for i in range(100):
        i = MyThread()
        i.start()
    while True:
        if threading.active_count()<=1:
            break
    end_time = time.time()
    costtime = float(end_time) - float(start_time)
    print costtime
    print "download END"

if __name__ == "__main__":
    main()

复制代码

 

运行结果

15.408192873
download END

 

启用100个线程发现只要花15秒即可完成任务,100个线程可能不是最优的方案,但较单线程有很明显的提升。接着再来看协程。

 

 

协程代码

复制代码

#!/usr/bin/env python
# coding:utf8
# Author: hz_oracle

import MySQLdb
import requests
import time
import threading
import Queue

from gevent import monkey; monkey.patch_all()
import gevent


class DbHandler(object):
    def __init__(self, host, port, user, pwd, dbname):
        self.host = host
        self.port = port
        self.user = user
        self.pwd = pwd
        self.db = dbname

    def db_conn(self):
        try:
            self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
            self.cursor = self.conn.cursor()
            return 1
        except Exception as e:
            return 0

    def get_urls(self, limitation):
        urls_list = list()
        sql = """select pic  from  picurltable limit  %s""" % limitation
        try:
            self.cursor.execute(sql)
            fetchresult = self.cursor.fetchall()
            for line in fetchresult:
                urls_list.append(line[0])
        except Exception as e:
            print u"数据库查询失败:%s"  % e
            return []
        return urls_list

    def db_close(self):
        self.conn.close()


def get_pic(url):
    try:
        pic_obj = requests.get(url).content
    except Exception as e:
        print u"图片出错"
        return ""
    filename = url.split('/')[-2]
    file_path = "./picture/" + filename + '.jpg'
    fp = file(file_path, 'wb')
    fp.write(pic_obj)
    fp.close()
    return "ok"


def main():
    start_time = time.time()
    db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
    db_obj.db_conn()
    url_list = db_obj.get_urls(100)
    gevent.joinall([gevent.spawn(get_pic,url) for url in url_list])

    end_time = time.time()
    costtime = float(end_time) - float(start_time)
    print costtime
    print "download END"

if __name__ == "__main__":
    main()

复制代码

 

运行结果

10.6234440804
download END

 

使用协程发现只花了10秒多,也就是三种方法中最快的。

 

总结:

        三种方法中,单线程最慢,多线程次之,而协程最快。 不过如果对多线程进行优化,也可能变快,这里不讨论。

©️2020 CSDN 皮肤主题: 编程工作室 设计师:CSDN官方博客 返回首页