(现在人工智能非常火爆,很多朋友都想学,但是一般的教程都是为博硕生准备的,太难看懂了。最近发现了一个非常适合小白入门的教程,不仅通俗易懂而且还很风趣幽默。所以忍不住分享一下给大家。
点这里https://www.cbedai.net/ialexanderi可以跳转到教程。)
在某些应用场景下,想要提高python的并发能力,可以使用多线程,或者协程。比如网络爬虫,数据库操作等一些IO密集型的操作。下面对比python单线程,多线程和协程在网络爬虫场景下的速度。
一,单线程。
单线程代
1 #!/usr/bin/env
2 # coding:utf8
3 # Author: hz_oracle
4
5 import MySQLdb
6 import gevent
7 import requests
8 import time
9
10
11 class DbHandler(object):
12 def __init__(self, host, port, user, pwd, dbname):
13 self.host = host
14 self.port = port
15 self.user = user
16 self.pwd = pwd
17 self.db = dbname
18
19 def db_conn(self):
20 try:
21 self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
22 self.cursor = self.conn.cursor()
23 return 1
24 except Exception as e:
25 return 0
26
27 def get_urls(self, limitation):
28 sql = """select pic from picurltable limit %s""" % limitation
29 urls_list = list()
30 try:
31 self.cursor.execute(sql)
32 fetchresult = self.cursor.fetchall()
33 for line in fetchresult:
34 urls_list.append(line[0])
35 print len(urls_list)
36 except Exception as e:
37 print u"数据库查询失败:%s" % e
38 return []
39 return urls_list
40
41 def db_close(self):
42 self.conn.close()
43
44
45 def get_pic(url):
46 try:
47 pic_obj = requests.get(url).content
48 except Exception as e:
49 print u"图片出错"
50 return ""
51 filename = url.split('/')[-2]
52 file_path = "./picture/" + filename + '.jpg'
53 fp = file(file_path, 'wb')
54 fp.write(pic_obj)
55 fp.close()
56 return "ok"
57
58
59 def main():
60 start_time = time.time()
61 db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
62 db_obj.db_conn()
63 url_list = db_obj.get_urls(100)
64 map(get_pic, url_list)
65 #for url in url_list:
66 # get_pic(url)
67 end_time = time.time()
68 costtime = float(end_time) - float(start_time)
69 print costtime
70 print "download END"
71
72 if __name__ == "__main__":
73 main()
运行结果
100
45.1282339096
download END
单线程情况下,下载100张图片花了45秒。
再来看多线程的情况下。
#!/usr/bin/env python
# coding:utf8
# Author: hz_oracle
import MySQLdb
import gevent
import requests
import time
import threading
import Queue
lock1 = threading.RLock()
url_queue = Queue.Queue()
urls_list = list()
class DbHandler(object):
def __init__(self, host, port, user, pwd, dbname):
self.host = host
self.port = port
self.user = user
self.pwd = pwd
self.db = dbname
def db_conn(self):
try:
self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
self.cursor = self.conn.cursor()
return 1
except Exception as e:
return 0
def get_urls(self, limitation):
sql = """select pic from picurltable limit %s""" % limitation
try:
self.cursor.execute(sql)
fetchresult = self.cursor.fetchall()
for line in fetchresult:
url_queue.put(line[0])
except Exception as e:
print u"数据库查询失败:%s" % e
return 0
return 1
def db_close(self):
self.conn.close()
class MyThread(threading.Thread):
def __init__(self):
super(MyThread, self).__init__()
def run(self):
url = url_queue.get()
try:
pic_obj = requests.get(url).content
except Exception as e:
print u"图片出错"
return ""
filename = url.split('/')[-2]
file_path = "./picture/" + filename + '.jpg'
fp = file(file_path, 'wb')
fp.write(pic_obj)
fp.close()
def main():
start_time = time.time()
db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
db_obj.db_conn()
db_obj.get_urls(100)
for i in range(100):
i = MyThread()
i.start()
while True:
if threading.active_count()<=1:
break
end_time = time.time()
costtime = float(end_time) - float(start_time)
print costtime
print "download END"
if __name__ == "__main__":
main()
运行结果
15.408192873
download END
启用100个线程发现只要花15秒即可完成任务,100个线程可能不是最优的方案,但较单线程有很明显的提升。接着再来看协程。
协程代码
#!/usr/bin/env python
# coding:utf8
# Author: hz_oracle
import MySQLdb
import requests
import time
import threading
import Queue
from gevent import monkey; monkey.patch_all()
import gevent
class DbHandler(object):
def __init__(self, host, port, user, pwd, dbname):
self.host = host
self.port = port
self.user = user
self.pwd = pwd
self.db = dbname
def db_conn(self):
try:
self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
self.cursor = self.conn.cursor()
return 1
except Exception as e:
return 0
def get_urls(self, limitation):
urls_list = list()
sql = """select pic from picurltable limit %s""" % limitation
try:
self.cursor.execute(sql)
fetchresult = self.cursor.fetchall()
for line in fetchresult:
urls_list.append(line[0])
except Exception as e:
print u"数据库查询失败:%s" % e
return []
return urls_list
def db_close(self):
self.conn.close()
def get_pic(url):
try:
pic_obj = requests.get(url).content
except Exception as e:
print u"图片出错"
return ""
filename = url.split('/')[-2]
file_path = "./picture/" + filename + '.jpg'
fp = file(file_path, 'wb')
fp.write(pic_obj)
fp.close()
return "ok"
def main():
start_time = time.time()
db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
db_obj.db_conn()
url_list = db_obj.get_urls(100)
gevent.joinall([gevent.spawn(get_pic,url) for url in url_list])
end_time = time.time()
costtime = float(end_time) - float(start_time)
print costtime
print "download END"
if __name__ == "__main__":
main()
运行结果
10.6234440804
download END
使用协程发现只花了10秒多,也就是三种方法中最快的