通过用爬虫示例来说明并发相关的多线程、多进程、协程之间的执行效率对比。
假设我们现在要在网上下载图片,一个简单的方法是用 requests+BeautifulSoup。注:本文所有例子都使用python3.5)
单线程
示例 1:get_photos.py
import os
import time
import uuid
import requests
from bs4 import BeautifulSoup
def out_wrapper(func): # 记录程序执行时间的简单装饰器
def inner_wrapper():
start_time = time.time()
func()
stop_time = time.time()
print('Used time {}'.format(stop_time-start_time))
return inner_wrapper
def save_flag(img, filename): # 保存图片
path = os.path.join('down_photos', filename)
with open(path, 'wb') as fp:
fp.write(img)
def download_one(url): # 下载一个图片
image = requests.get(url)
save_flag(image.content, str(uuid.uuid4()))
def user_conf(): # 返回30个图片的url
url = 'https://unsplash.com/'
ret = requests.get(url)
soup = BeautifulSoup(ret.text, "lxml")
zzr = soup.find_all('img')
ret = []
num = 0
for item in zzr:
if item.get("src").endswith('80') and num < 30:
num += 1
ret.append(item.get("src"))
return ret
@out_wrapper
def download_many():
zzr = user_conf()
for item in zzr: