标准
0-100000的范围,过滤掉小于3的元素
同一CPU,使用Python的内置库函数timeit()
结论
dropwhile()
速度最快,是filter和filterfalse的3倍,pandas最慢
代码
import pandas as pd
from timeit import timeit
import matplotlib.pyplot as plt
def plot_bar(datas, xname='name', yname='time', sort=None):
plt.rcParams['figure.dpi'] = 100 # 分辨率
plt.rcParams['figure.figsize'] = 3, 4 # 长宽比
plt.bar(datas[xname], height=datas[yname]) # 绘制柱状图
xlocs, xlabs = plt.xticks()
for i, v in enumerate(datas[yname]):
plt.text(xlocs[i], v, '%.2f' % v, horizontalalignment='center') # 保留两位
plt.xticks(rotation=45) # 旋转X标签
plt.ylabel(yname)
plt.show()
cost = {'name': ['dropwhile', 'filter', 'filterfalse', 'pandas'], 'time': []}
t = timeit('list(dropwhile(lambda x: x<3, range(100000)))', number=100, setup='from itertools import dropwhile') # dropwhile
cost['time'].append(t)
t = timeit('list(filter(lambda x: x>=3, range(100000)))', number=100) # filter
cost['time'].append(t)
t = timeit('list(filterfalse(lambda x: x<3, range(100000)))', number=100, setup='from itertools import filterfalse') # filterfalse
cost['time'].append(t)
def pdtest():
df = pd.DataFrame(range(100000))
df = df[df[0] >= 3]
t = timeit('pdtest()', number=100, setup='from __main__ import pdtest') # pandas
cost['time'].append(t)
print(cost)
plot_bar(datas=cost, xname='name', yname='time')
注意
itertools.dropwhile()
只适合用于连续序列