教你如何简化并加速python代码

最新推荐文章于 2024-03-22 13:47:23 发布

深褐色七分裤

最新推荐文章于 2024-03-22 13:47:23 发布

阅读量949

点赞数 36

文章标签： python 开发语言

本文链接：https://blog.csdn.net/qq_42735683/article/details/136240424

版权

本文详细介绍了Python中的优雅语句，包括赋值、判断、循环、函数以及如何通过使用集合数据结构、向量化操作、多线程和多进程等方式来加速代码执行。涵盖了元组解包、单行if语句、列表推导式、生成器、装饰器和pandas库的优化等知识点。

摘要由CSDN通过智能技术生成

一、优雅python语句

1. 赋值语句

使用元组语法同时赋值
```
a, b, c = 1, 2, 3
```

序列自动解包

ls = ['zhangsan', 18, 'male']
name, age, gender = ls

使用对象方法嵌套，减少中间变量

s = 'Python$$ is simple,$$readable **and powerful!**'
print(s.replace('$', '').replace('*', ''))

2. 判断语句

使用单行if语句：if…else…三目运算符
```
x = -5
y = x if x >= 0 else -x
```

使用链式判断

score = 78
if 60 <= score < 80:
    level = 'C'
print(level)

判断是否为多个取值之一，使用关键字in

level = 'C'
if level in ('A', 'B', 'C'):
    status = 'pass'
print(status)

判断是否为空列表、空字典、空字符串，利用隐含类型转换直接判断

l, d, s = [1, 2, 3], {}, ''
if l:
    print('l is not empty!')
if d:
    print('d is not empty!')
if s:
    print('s is not empty!')

判断诸多条件中是否至少有一个成立，利用any函数

math, physics, computer = 70, 40, 80
if any([math < 60, physics < 60, computer < 60]):
    print('not pass!')

判断诸多条件是否全部成立，利用all函数

math, physics, computer = 70, 70, 80
if all([math > 60, physics > 60, computer > 60]):
    print('pass!')

3. 循环语句

使用推导式：[… for … in … if …]

# 过滤列表l中的全部数值并求和
l = [1, 2, 3, 'abc', 4, 5.0]
sum([i for i in l if type(i) in [int, float]])

利用enumerate函数同时遍历序列的元素和元素下标

seasons = ['spring', 'summer', 'autumn', 'winter']
for i, s in enumerate(seasons):
    print(i, ':', s)

4. 函数

使用lambda匿名函数

# 过滤l中全部的数值并求和
l = [1, 2, 3, 'abc', 4, 5.0]
sum(filter(lambda x: isinstance(x, (int, float)), l))

使用yield生成器收集系列值

# 生成斐波那契数列前10项
def fibs(n):
    a, b, i = 1, 1, 1
    while i <= n:
        i += 1
        yield a
        a, b = b, a+b
list(fibs(10))

使用装饰器给函数添加插入日志，性能测试等非核心功能

# 给my_sum函数添加性能测试功能：输出其运行时间
import time
def runtime(func):
    def wrapper(*args, **kvargs):
        tic = time.time()
        result = func(*args, **kvargs)
        toc = time.time()
        print('{} is called, {}s used'.format(func.__name__, toc - tic))
        return(result)
    return(wrapper)

@runtime
def my_sum(*args):
    s = 0
    for i in args:
        s = s + i
    return(s)
# @runtime相当于my_sum = runtime(my_sum)
my_sum(*range(10000))

二、加速python代码

1. 查找

用set而非list进行查找

data = (i**2 + 1 for i in range(1000000))
list_data = list(data)
set_data = set(data)
# 低速方法
1098987 in list_data
# 高速方法
1098987 in set_data

用字典dict而非两个list进行匹配查找

list_a = [2*i-1 for i in range(1000000)]
list_b = [i**2 for i in list_a]
dict_ab = dict(zip(list_a, list_b))
# 低速方法
print(list_b[list_a.index(876567)])
# 高速方法
print(dict_ab.get(876567, None))

2. 循环

优先使用for循环而不是while循环

# 低速方法
s, i = 0, 0
while i < 10000:
    i = i + 1
    s = s + i
print(s)
# 高速方法
s = 0
for i in range(1, 10001):
    s = s + i
print(s)

循环体中避免重复计算

a = [i**2+1 for i in range(2000)]
# 低速方法
b = [i/sum(a) for i in a]
# 高速方法
sum_a = sum(a)
b = [i/sum_a for i in a]

3. 函数

用循环机制代替递归函数

# 低速方法
def fib(n):
    return (1 if n in (1, 2) else fib(n-1) + fib(n-2))
print(fib(30))
# 高速方法
def fib(n):
    if n in (1, 2):
        return 1
    a, b = 1, 1
    for i in range(2, n):
        a, b = b, a + b
    return b
print(fib(30))

用缓存机制加速递归函数

# 低速方法
def fib(n):
    return (1 if n in (1, 2) else fib(n-1) + fib(n-2))
print(fib(30))
# 高速方法
from functools import lru_cache
@lru_cache(100)
def fib(n):
    return (1 if n in (1, 2) else fib(n-1) + fib(n-2))
print(fib(30))

4. 使用标准库函数collections加速

使用collections.Counter加速计数

data = [x**2%1989 for x in range(2000000)]
# 低速方法
values_count = {}
for i in data:
    i_cnt = values_count.get(i, 0)
    values_count[i] = i_cnt + 1
print(values_count.get(4, 0))
# 高速方法
from collections import Counter
values_count = Counter(data)
print(values_count.get(4, 0))

使用collections.ChainMap加速字典合并

dict_a = {i:i+1 for i in range(1, 1000000, 2)}
dict_b = {i+2:i+1 for i in range(1, 1000000, 3)}
dict_c = {i+3:i+1 for i in range(1, 1000000, 5)}
dict_d = {i+4:i+1 for i in range(1, 1000000, 7)}
# 低速方法
result = dict_a.copy()
result.update(dict_b)
result.update(dict_c)
result.update(dict_d)
# 高速方法
from collections import ChainMap
chain = ChainMap(dict_a, dict_b, dict_c, dict_d)

5. 使用numpy向量化加速

使用np.array代替list

# 低速方法
a = range(1, 1000000, 3)
b = range(1, 1000000, -3)
c = [a[i] * 3 - b[i] * 2 for i in range(0, len(a))]
# 高速方法
import numpy as np
array_a = np.arange(1, 1000000, 3)
array_b = np.arange(1, 1000000, -3)
array_c = 3 * array_a - 2 * array_b

使用np.ufunc代替math.func

# 低速方法
import math
a = range(1, 1000000, 3)
b = [math.log(x) for x in a]
# 高速方法
import numpy as np
array_a = np.arange(1, 1000000, 3)
array_b = np.log(array_a)

使用np.where代替if

import numpy as np
array_a = np.arange(-100000, 100000)
# 低速方法
# np.vectorize可以将普通函数转换成支持向量化的函数
relu = np.vectorize(lambda x: x if x > 0 else 0)
array_b = relu(array_a)
# 高速方法
relu = lambda x: np.where(x > 0, x, 0)
array_b = relu(array_a)

6. 加速pandas

使用np.ufunc函数代替applymap

import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randint(-10, 11, size=(100000, 26)), columns=list('abcdefghijklmnopqrstuvwxyz'))
# 低速方法
result = df.applymap(lambda x: np.sin(x) + np.cos(x))
# 高速方法
result = np.sin(df) + np.cos(df)

使用预分配存储代替动态扩容

import numpy as np
import pandas as pd
# 低速方法
df = pd.DataFrame(columns=list('abcdefghijklmnopqrstuvwxyz'))
for i in range(10000):
    df.loc[i, :] = range(i, i+26)
# 高速方法
df = pd.DataFrame(np.zeros((10000, 26)), columns=list('abcdefghijklmnopqrstuvwxyz'))
for i in range(10000):
    df.loc[i, :] = range(i, i+26)

使用csv文件读写代替excel文件读写

# 低速方法
df.to_excel('data.xlsx')
# 高速方法
df.to_csv('data.csv')

使用pandas多进程工具pandarallel

import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randint(-10, 11, size=(100000, 26)), columns=list('abcdefghijklmnopqrstuvwxyz'))
# 低速方法
result = df.apply(np.sum, axis=1)
# 高速方法
from pandarallel import pandarallel
pandarallel.initialize(nb_workers=4)
result = df.parallel_apply(np.sum, axis=1)

7. 应用多线程多进程加速

应用多线程加速IO密集型任务

def writefile(i):
    with open(str(i) + '.txt', 'w') as f:
        s = ('hello %d' % i) * 1000000
        f.write(s)
# 低速方法（串行）
for i in range(10):
    writefile(i)
# 高速方法（多线程）
import threading
thread_list = []
for i in range(10):
    t = threading.Thread(target=writefile, args=(i,))
    t = setDaemon(True)
    thread_list.append(t)
for t in thread_list:
    t.start()  # 启动线程
for t in thread_list:
    t.join()  # 等待子线程结束

应用多进程加速CPU密集型任务

import time
def muchjob(x):
    time.sleep(5)
    return x**2
# 低速方法（串行）
ans = [muchjob(i) for i in range(8)]
# 高速方法（多进程）
import multiprocessing
data = range(8)
pool = multiprocessing.Pool(processes=4)
result = []
for i in range(8):
    result.append(pool.apply_async(muchjob, (i,)))
pool.close()
pool.join()
ans = [res.get() for res in result]