1. 线程
1.1 单线程
"""
案例名称:多线程测试
思路:
单线程与多线程比较,比较其运行时间
"""
"""单线程"""
import time
import threading
loops = [4, 2]
def loop(nloop, nsec):
print('开始:nloop: %s,time:%s' % (nloop, time.ctime()))
time.sleep(nsec)
print('结束:nloop: %s,time:%s' % (nloop, time.ctime()))
if __name__ == '__main__':
start_time = time.time()
print('loop 开始运行', time.ctime())
for i in range(len(loops)):
loop(i, loops[i])
end_time = time.time()
print('loop 结束运行', time.ctime())
print(end_time - start_time)
运行结果:
loop 开始运行 Fri Oct 11 17:56:27 2019
开始:nloop: 0,time:Fri Oct 11 17:56:27 2019
结束:nloop: 0,time:Fri Oct 11 17:56:31 2019
开始:nloop: 1,time:Fri Oct 11 17:56:31 2019
结束:nloop: 1,time:Fri Oct 11 17:56:33 2019
loop 结束运行 Fri Oct 11 17:56:33 2019
6.001017093658447
1.2 多线程
import time
import threading
loops = [4, 2]
def loop(nloop, nsec):
print('开始:nloop: %s,time:%s' % (nloop, time.ctime()))
time.sleep(nsec)
print('结束:nloop: %s,time:%s' % (nloop, time.ctime()))
def main():
start_time = time.time()
print('loop 开始运行', time.ctime())
threads = []
for i in range(len(loops)):
t = threading.Thread(target=loop, args=(i, loops[i], ))
threads.append(t)
for i in range(len(loops)):
threads[i].start()
for i in range(len(loops)):
threads[i].join()
end_time = time.time()
print('loop 结束运行', time.ctime())
return end_time - start_time
if __name__ == '__main__':
result = main()
print(result)
运行结果:
loop 开始运行 Fri Oct 11 17:51:04 2019
开始:nloop: 0,time:Fri Oct 11 17:51:04 2019
开始:nloop: 1,time:Fri Oct 11 17:51:04 2019
结束:nloop: 1,time:Fri Oct 11 17:51:06 2019
结束:nloop: 0,time:Fri Oct 11 17:51:08 2019
loop 结束运行 Fri Oct 11 17:51:08 2019
4.002340078353882
1.3 类实例创建多线程
from time import time, ctime, sleep
import threading
loops = [4, 2]
class MyThread:
def __init__(self, func, args, name=''):
self.name = name
self.func = func
self.args = args
def __call__(self, *args, **kwargs):
self.func(*self.args)
def loop(nloop, nsec):
print('开始:nloop: %s,time:%s' % (nloop, ctime()))
sleep(nsec)
print('结束:nloop: %s,time:%s' % (nloop, ctime()))
def main():
start_time = time()
print('loop 开始运行', ctime())
threads = []
for i in range(len(loops)):
t = threading.Thread(target=MyThread(loop, (i, loops[i]), loop.__name__))
threads.append(t)
for i in range(len(loops)):
threads[i].start()
for i in range(len(loops)):
threads[i].join()
end_time = time()
print('loop 结束运行', ctime())
return end_time - start_time
if __name__ == '__main__':
result = main()
print(result)
运行结果:
loop 开始运行 Fri Oct 11 18:08:23 2019
开始:nloop: 0,time:Fri Oct 11 18:08:23 2019
结束:nloop: 0,time:Fri Oct 11 18:08:27 2019
开始:nloop: 1,time:Fri Oct 11 18:08:27 2019
结束:nloop: 1,time:Fri Oct 11 18:08:29 2019
loop 结束运行 Fri Oct 11 18:08:29 2019
6.0009613037109375
1.4 线程池 ThreadPoolExecutor
利用concurrent.futures.Future
来进行各种便捷的数据交互,包括处理异常,都在result()
中再次抛出。
import time
import threading
from concurrent import futures
from concurrent.futures import ThreadPoolExecutor
def task(n):
time.sleep(0.5)
print('线程:%s,当前数字:%s' % (threading.currentThread().getName(), n))
return n * 2
def fetch1():
"""多线程,按任务发布顺序依次等待完成,有序"""
with ThreadPoolExecutor(max_workers=5) as executor:
future_list = [executor.submit(task, i) for i in range(12)]
for future in future_list:
print(future.result())
# print(future.exception())
def fetch2():
"""多线程,先完成先显示, 无序"""
with ThreadPoolExecutor(max_workers=5) as executor:
future_list = [executor.submit(task, i) for i in range(12)]
done_iter = futures.as_completed(future_list) # generator
for done in done_iter:
print(done)
if __name__ == '__main__':
# fetch1()
fetch2()
4. grequests 库
- 实例化请求对象:
grequests.request(method, url, **kwargs)
- 发起请求获得响应:
grequests.map(requests, stream=False, size=None, exception_handler=None, gtimeout=None)
- size 参数可以控制并发的数量,一般最好是 50 -100
grequests.map
返回值属性:
>>> r = grequests.map(reqs, exception_handler=exception_handler)
>>> dir(r[0])
['__attrs__', '__bool__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__nonzero__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_content', '_content_consumed', '_next', 'apparent_encoding', 'close', 'connection', 'content', 'cookies', 'elapsed', 'encoding', 'headers', 'history', 'is_permanent_redirect', 'is_redirect', 'iter_content', 'iter_lines', 'json', 'links', 'next', 'ok', 'raise_for_status', 'raw', 'reason', 'request', 'status_code', 'text', 'url']
发现我们能够用得到有:text、url、links、json、status_code、headers
等
import grequests
from time import ctime, time
def exception_handler(request, exception):
"""单个请求错误提示"""
print('请求错误', request, exception)
print(request.url)
return [request.url]
def test1():
"""grequests 库"""
urls = [
'https://www.baidu.com',
'https://www.baidu.com',
'https://www.baidu.com',
'https://www.baidu.com',
'https://www.baidu.com',
'https://www.baidu.com',
'https://www.google.com',
]
print('开始请求:', ctime())
reqs = [grequests.get(url, timeout=5) for url in urls]
r = grequests.map(reqs, exception_handler=exception_handler)
print(r)
for i in r:
print(type(i), ctime())
return r
if __name__ == '__main__':
test1()
运行结果:
开始请求: Mon Oct 14 17:25:38 2019
请求错误 <grequests.AsyncRequest object at 0x000001CCAC84ACC0> HTTPSConnectionPool(host='www.google.com', port=443): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.VerifiedHTTPSConnection object at 0x000001CCAC8EFE48>, 'Connection to www.google.com timed out. (connect timeout=5)'))
https://www.google.com
[<Response [200]>, <Response [200]>, <Response [200]>, <Response [200]>, <Response [200]>, <Response [200]>, ['https://www.google.com']]
<class 'requests.models.Response'> Mon Oct 14 17:25:43 2019
<class 'requests.models.Response'> Mon Oct 14 17:25:43 2019
<class 'requests.models.Response'> Mon Oct 14 17:25:43 2019
<class 'requests.models.Response'> Mon Oct 14 17:25:43 2019
<class 'requests.models.Response'> Mon Oct 14 17:25:43 2019
<class 'requests.models.Response'> Mon Oct 14 17:25:43 2019
<class 'list'> Mon Oct 14 17:25:43 2019
总结
grequests.map(reqs, exception_handler=exception_handler)
:可以指定错误处理函数- 在错误处理函数中可以通过
request.url
获取请求URL
- 我们可以将有问题的
URL
,返回,它会自动添加到grequests.map()
返回值最后面
参考文章:https://blog.csdn.net/cong_da_da/article/details/84325849
5. aiohttp + asyncio 异步 http
5.1 快速开始
pip install aiohttp -i https://pypi.douban.com/simple
获取网页:
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
content = await response.text()
return content
async def main():
async with aiohttp.ClientSession() as session:
# 使用协程的 await 关键字获取 fetch 的返回值
html = await fetch(session, 'http://python.org')
print(html)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
响应对象 response 方法或属性
['ATTRS', '__aenter__', '__aexit__', '__class__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_body', '_cache', '_cleanup_writer', '_closed', '_connection', '_content_dict', '_content_type', '_continue', '_headers', '_history', '_loop', '_notify_content', '_parse_content_type', '_protocol', '_raw_headers', '_real_url', '_released', '_request_info', '_response_eof', '_session', '_source_traceback', '_stored_content_type', '_timer', '_traces', '_url', '_writer', 'charset', 'close', 'closed', 'connection', 'content', 'content_disposition', 'content_length', 'content_type', 'cookies', 'get_encoding', 'headers', 'history', 'host', 'json', 'links', 'method', 'raise_for_status', 'raw_headers', 'read', 'real_url', 'reason', 'release', 'request_info', 'start', 'status', 'text', 'url', 'url_obj', 'version', 'wait_for_close']
比较常用的有:
url
:请求的URL
status
:响应 code,类似于requests 库的 status_code
text()、json()
:响应内容,json()
格式如果没有的话会报错
其他 HTTP 请求
session.post('http://httpbin.org/post', data=b'data')
session.put('http://httpbin.org/put', data=b'data')
session.delete('http://httpbin.org/delete')
session.head('http://httpbin.org/get')
session.options('http://httpbin.org/get')
session.patch('http://httpbin.org/patch', data=b'data')
Tips:当请求超时或者发送错误时,返回值将为 None,应全面考虑
5.2 使用 asyncio Task发送多个请求
import asyncio, aiohttp, time
async def fetch(session, url):
async with session.get(url, timeout=30) as resp:
# await 获取结果
text = await resp.text()
return resp.url
async def main(urls):
content = {}
async with aiohttp.ClientSession() as session:
# 把所有请求放在 task 中
tasks = [fetch(session, url) for url in urls]
# 任务完成时,再处理任务结果,详情见 3.3.3
for task in asyncio.as_completed(tasks):
results = await task
# url 是 <class 'yarl.URL'> 类型,需要 str 转换
content[str(results)] = str(results)
print('任务结果:%s,时间:%s' % (results, time.ctime()))
return content
if __name__ == '__main__':
urls = [
'https://www.baidu.com',
'https://www.douban.com',
# 'https://www.google.com',
]
loop = asyncio.get_event_loop()
try:
print('开始事件循环:', time.ctime())
ret = loop.run_until_complete(main(urls)) # 获取协程结果
print('执行结果:', ret)
except Exception as e:
print('发送错误', e)
finally:
loop.close()
执行结果:
开始事件循环: Thu Oct 17 10:49:57 2019
任务结果:https://www.baidu.com,时间:Thu Oct 17 10:49:57 2019
任务结果:https://www.douban.com,时间:Thu Oct 17 10:49:57 2019
执行结果: {'https://www.baidu.com': 'https://www.baidu.com', 'https://www.douban.com': 'https://www.douban.com'}
5.3 错误:AssertionError: There is no current event loop in thread ‘Thread-1’
原因:
asyncio
程序中的每个线程都有自己的事件循环,但它只会在主线程中为你自动创建一个事件循环。所以如果你asyncio.get_event_loop
在主线程中调用一次,它将自动创建一个循环对象并将其设置为默认值,但是如果你在一个子线程中再次调用它,你会得到这个错误。相反,您需要在线程启动时显式创建/设置事件循环:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
原文链接:https://blog.csdn.net/qq_34367804/article/details/75046718
场景
Django 请求下获取事件循环,调用相应函数异步请求 HTTP,出现以上错误:
1、Project\app\views.py
import asyncio
class CheckPeerView(APIView):
def get(self, request, *args, **kwargs):
return render(request, 'myadmin/acc_check_peer.html')
def post(self, request, *args, **kwargs):
# 调用相应接口
results = self.func(loop)
return HttpResponse(json.dumps(xxx))
def func(self, loop):
loop = asyncio.get_event_loop()
# 启动事件循环
results = loop.run_until_complete(async_http(urls))
loop.close() # 关闭事件循环
return results
2、Project\utils\common\request_handel.py
import asyncio
import aiohttp
async def async_http(urls):
async with aiohttp.ClientSession() as session:
tasks = [fetch(session, url) for url in urls]
# 所有任务完成后,处理任务结果
for task in asyncio.as_completed(tasks):
content = await task
if content:
status, result, send_url = content
if status == 200:
pass
else:
pass
return xxx
async def fetch(session, url):
try:
async with session.get(url, headers=reuturn_headers(), timeout=30) as resp:
json_data = await resp.json()
return resp.status, json_data, resp.url
except Exception as e:
pass
当有 post 请求过来时,调用 async_http()
方法,发生:AssertionError: There is no current event loop in thread ‘Thread-1’
错误
解决办法:
class CheckPeerView(APIView):
def get(self, request, *args, **kwargs):
return render(request, 'myadmin/acc_check_peer.html')
def post(self, request, *args, **kwargs):
# 新建事件循环
loop = asyncio.new_event_loop() # 新增这两句
asyncio.set_event_loop(loop)
# 调用相应接口
results = self.func(loop)
return HttpResponse(json.dumps(peer_status))
def func(self, loop):
#loop = asyncio.get_event_loop() # 注释这句
# 启动事件循环
results = loop.run_until_complete(async_http())
loop.close() # 关闭事件循环
return results
5.4 URL 上传递参数
params = {'key1': 'value1', 'key2': 'value2'}
async with session.get('http://httpbin.org/get',
params=params) as resp:
expect = 'http://httpbin.org/get?key2=value2&key1=value1'
assert str(resp.url) == expect
参考文章:https://aiohttp.readthedocs.io/en/stable/client_quickstart.html
5.6 示例二:gather 收集所有的 Future 对象
# coding: utf-8
import asyncio
import json
import os
import time
import sys
import pandas as pd
import aiohttp
import requests
from datetime import datetime
headers = {
'Content-Type': "application/x-www-form-urlencoded",
'cache-control': "no-cache",
'Postman-Token': "d2d27edd-6795-45be-a1ef-d4cd749f79c4"
}
class Handle:
def parser_args(self, command, excel_path):
"""
参数解析
:param command: 执行命令
:param excel_path: excel 文件
:return:
"""
self.read_excel(command, excel_path)
def read_excel(self, command, excel_path):
"""读取 excel"""
df = pd.read_excel(excel_path)
data = df.values
mac_list = []
for i in data:
mac_list.append(i[0])
self.build_async(mac_list, command)
def build_async(self, mac_list, command):
"""
构建协程
:param mac_list: Mac 列表
:param command: 命令
:return:
"""
print('异步任务')
start_time = time.time()
# 新建事件循环,协程
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(self.async_http(mac_list, command))
print('总耗时:', time.time() - start_time)
async def async_http(self, mac_list, command):
try:
url = "http://xxxx"
tasks = [] # 任务列表
semaphore = asyncio.Semaphore(5) # 控制并发量
for mac in mac_list:
# 添加到任务中
tasks.append(self.fetch(url, command, mac, semaphore))
# asyncio.gather(*tasks),它搜集所有的Future对象,然后等待他们返回
responses = await asyncio.gather(*tasks)
result = self.result_handle(responses)
print('>>>', result)
except Exception as e:
pass
async def fetch(self, url, command, mac, semaphore):
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.post(url, data=data, headers=headers, timeout=30) as resp:
content = await resp.text()
return resp.status, content, mac
def result_handle(self, responses):
"""
处理协程结果
:param responses: [(200, '{"code":0,"data":"bin\\n","timestamp":"1581911821","message":"success"}\n', 'D4EE076436A4'), ...]
:return:
"""
result = []
online, offline, timeout = 0, 0, 0
for i in responses:
if i[0] == 200:
resp = json.loads(i[1])
if resp.get("code") == 108 or resp.get("message") == "device offline":
# print("该 Mac 离线 %s" % i[-1])
offline += 1
elif resp.get("code") == 101:
# print("查询超时:%s" % i[-1])
timeout += 1
else:
result.append({
i[-1]: resp.get("data")
})
online += 1
return result, online, offline, timeout
if __name__ == '__main__':
h = Handle()
arg_list = sys.argv
if len(arg_list) == 3:
if not arg_list[-1].endswith('.xlsx'):
sys.exit('请执行 excel 格式文件!')
num = 1
while True:
print('第 %s 次测试' % num)
h.parser_args(arg_list[1], arg_list[-1])
# time.sleep(5)
print('-' * 50)
num += 1
if num == 20:
break
else:
sys.exit('参数数目错误!')
6. Python 实现 requests 请求失败重试机制
原理:设置一个 retries
,每次发生异常时 retries
就减 1,并重新调用原函数请求,直至小于 0 为止:
import requests
def http_request(url, method, timeout=30, retries=5):
try:
resp = requests.request(method=method, url=url, timeout=timeout)
print(resp.status_code)
except Exception as e:
print('e', e)
if retries > 0:
return http_request(url, 'get', timeout=30, retries=retries - 1)
else:
print('req failed')
return None
else: # 无异常时,执行 else 中的语句
return resp.status_code
if __name__ == '__main__':
url = 'https://google.com'
method = 'get'
result = http_request(url=url, method=method)
print('请求结果:', result)