1、耗时装饰器
import time
def decorate(func):
def inner():
begin = time.time()
result = func()
end = time.time()
print(f'函数{func}耗时{end - begin}')
return result
return inner
2、查看代码运行耗时
from line_profiler import LineProfiler
def operation1():
num=0
for i in range(10000):
num += 1
def operation2():
num=0
while(num < 10000):
num += 1
if name == "main":
lprofiler = LineProfiler(operation1,operation2)
lprofiler.run('operation1()')
lprofiler.run('operation2()')
lprofiler.print_stats()
2、匹配文件路径列表
(1)glob方法:
# glob模块的主要方法就是glob,该方法返回所有匹配的文件路径列表(list);该方法需要一个参数用来指定匹配的路径字符串(字符串可以为绝对路径也可以为相对路径),其返回的文件名只包括当前目录里的文件名,不包括子文件夹里的文件。
# 比如:获得指定目录下的所有jpg文件
glob.glob(r’E:\pic**.jpg’)
# 获取py文件
glob.glob(r’../*.py’)
(2)iglob方法:
f = glob.iglob(r'../*.py')
for py in f:
print py1
3、使用缓存加快速度,将数据缓存到内存
from functools import lru_cache
@lru_cache(None)
def add(x, y):
print("calculating: %s + %s" % (x, y))
return x + y
print(add(1, 2))
print(add(1, 2))
print(add(2, 3))
4、使用缓存加快速度,数据缓存到磁盘上
import os
import uuid
import pickle
import shutil
import tempfile
from functools import wraps as func_wraps
class DiskCache(object):
"""缓存数据到磁盘
实例化参数:
-----
cache_path: 缓存文件的路径
"""
_NAMESPACE = uuid.UUID("c875fb30-a8a8-402d-a796-225a6b065cad")
def __init__(self, cache_path=None):
if cache_path:
self.cache_path = os.path.abspath(cache_path)
else:
self.cache_path = os.path.join(tempfile.gettempdir(), ".diskcache")
def __call__(self, func):
"""返回一个包装后的函数
如果磁盘中没有缓存,则调用函数获得结果并缓存后再返回
如果磁盘中有缓存,则直接返回缓存的结果
"""
@func_wraps(func)
def wrapper(*args, **kw):
params_uuid = uuid.uuid5(self._NAMESPACE, "-".join(map(str, (args, kw))))
key = '{}-{}.cache'.format(func.__name__, str(params_uuid))
cache_file = os.path.join(self.cache_path, key)
if not os.path.exists(self.cache_path):
os.makedirs(self.cache_path)
try:
with open(cache_file, 'rb') as f:
val = pickle.load(f)
except Exception:
val = func(*args, **kw)
try:
with open(cache_file, 'wb') as f:
pickle.dump(val, f)
except Exception:
pass
return val
return wrapper
def clear(self, func_name):
"""清理指定函数调用的缓存"""
for cache_file in os.listdir(self.cache_path):
if cache_file.startswith(func_name + "-"):
os.remove(os.path.join(self.cache_path, cache_file))
def clear_all(self):
"""清理所有缓存"""
if os.path.exists(self.cache_path):
shutil.rmtree(self.cache_path)
cache_in_disk = DiskCache()
@cache_in_disk
def add(x, y):
return x + y
5、python如何判断两个数组完全相等
1、全部对应元素相等:
(aLst==bLst).all()
2、有任何一个对应元素相等:
(aLst==bLst).any()
6、计算词汇
# 统计词频
from collections import defaultdict
frequency = defaultdict(int)
for text in texts:
for token in text:
frequency[token] += 1
# 只保留出现次数大于1的词
processed_corpus = [[token for token in text if frequency[token] > 1] for text in texts]
7、获取sh脚本的echo值
value = os.popen('bash test.sh').read().strip('\n')
8、多链异步(python异步编程之asyncio(百万并发) - 三只松鼠 - 博客园、非常适合小白的 Asyncio 教程_静觅-CSDN博客)
import time
import asyncio
from aiohttp import ClientSession
tasks = []
url = "https://www.baidu.com/{}"
async def hello(url):
async with ClientSession() as session:
async with session.get(url) as response:
# print(response)
print('Hello World:%s' % time.time())
return await response.read()
def run():
for i in range(5):
task = asyncio.ensure_future(hello(url.format(i)))
tasks.append(task)
result = loop.run_until_complete(asyncio.gather(*tasks))
print(result)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
run()
9、性能耗时分析
from line_profiler import LineProfiler
def do_profile(func):
def profiled_func(*args, **kwargs):
profiler = LineProfiler(func)
try:
profiler.enable()
return func(*args, **kwargs)
finally:
profiler.disable()
profiler.print_stats()
return profiled_func