Python3中in操作
在列表,字典,集合中的速度对比
本实例旨在探索单个元素在大数据容器中的查找速度。另外,有单个资源的 放大版实例(–>click传送门)
结论:对于需要涉及大量in查找操作的一批数据,最好使用字典或集合。
运行结果:
import datetime
import matplotlib.pyplot as mp
import numpy as np
import hashlib
def hex_sha1(STR):
s1 = hashlib.sha1()
s1.update(STR.encode('utf8'))
return s1.hexdigest().upper()
# print(hex_sha1('E5AC7D06E7C9A0F5'))
# E18937A9639FE98F8A24348ED0BBE3061A5465A8
def f(xx, iters):
t0 = datetime.datetime.now()
print('查找开始', t0)
print('类型', type(iters), xx, len(iters))
print(xx in iters)
t1 = datetime.datetime.now()
print('查找结束', t1)
print('耗时', (t1-t0).seconds, (t1-t0).microseconds)
return (t1-t0).microseconds
sed = 10**7 # 储存md5的数
# 1) 先准备数据
print(datetime.datetime.now())
L_all = np.array([hex_sha1(str(x))*2 for x in range(sed+1)])
print(datetime.datetime.now())
X, y = [], [] # X数量, y是各个函数时间[[y1,y2,y3],[...],...]
for x in list(range(0, sed+1, int(sed/10))):
X.append(x)
Ly = [] # y1,y2,y3运行时间
print()
for iters in ('L_all[:x+1]',
'list(L_all[:x+1])',
'dict.fromkeys(L_all[:x+1], None)',
'set(L_all[:x+1])'):
iters = eval(iters)
yn = f(hex_sha1(str(x))*2, iters)
Ly.append(yn)
y.append(Ly)
y1, y2, y3, y4 = (np.array(y)/1000).T
mp.plot(X, y1, linestyle='-', label="array", color='green')
mp.plot(X, y2, linestyle='-', label="list", color='red')
mp.plot(X, y3, linestyle='-', label="dict", color='blue')
mp.plot(X, y4, linestyle='-', label="set", color='orange')
mp.title('speed test', fontsize=20)
mp.xlabel('Num', fontsize=12)
mp.ylabel('Time(ms)', fontsize=12)
for y, colors in zip([y1, y2, y3, y4], ['green', 'red', 'blue', 'orange']):
mp.scatter(X, y,
marker='o', # 点型 ~ matplotlib.markers
s=60, # 大小
edgecolor=colors, # 边缘色
facecolor='white', # 填充色
zorder=3 # 绘制图层编号 (编号越大,图层越靠上)
)
mp.legend()
mp.show()