Python 自带 heapq[1] 模块实现堆(heap),是小顶堆(min heap),元素可以是简单的数据类型(数字、字符串),也可以是 tuple/list,不过要保证同类型,如不能同时出现 tuple 和 list。
当元素是 tuple/list,元素比较就像字典序一样,此时要求每个元素的比较都定义好,不然可能会报错(如 numpy.ndarray 的比较)。
简例:
import pprint, heapq, copy
import numpy as np
# import medpy.io as medio
# from PIL import Image
# import skimage
import heapq
print('\t1, build a heap')
x = [1, 0.2, -3.4, 5., .6, -.7, -8., 9]#, "10"]
heapq.heapify(x)
while len(x) > 0:
t = heapq.heappop(x)
print(t)
data = [
(1.2, 17, 'tom', np.random.randn(3)),
(1.2, 18, 'jerry', np.random.randn(3)),
(1.2, 18, 'spike', np.random.randn(3)),
# (1.2, 18, 'spike', np.random.randn(3)), # error, `<` of numpy.ndarray not well defined
(1.2, 18, 'spike'), # 短一点
(3.4, 18, 'Toodles Galore', np.random.randn(3)),
(5.6, 19, 'tara', np.random.randn(3)),
# [1.2, 10, 'tuffy', np.random.randn(3)] # error, should be same type
]
data = [list(datum) for datum in data] # OK
print('\t2, build & maintain a heap')
x = []
for datum in data:
if len(x) > 3:
t = heapq.heappushpop(x, datum)
print(t, len(x))
else:
heapq.heappush(x, datum)
while len(x) > 0:
t = heapq.heappop(x)
print(t, len(x))
print('\t3, heapq.merge 错用')
y, z = [], []
for i in range(len(data) // 2):
heapq.heappush(y, data[i])
for i in range(len(data) // 2, len(data)):
heapq.heappush(z, data[i])
# 不能这么 heapq.merge 两个 heap
# 因为 heapq.merge 假设所有要 merge 的序列是 sort 过的
# 而 heap sort 的顺序与 sort 不同!
for t in heapq.merge(y, z):
print(t)
输出:
1, build a heap
-8.0
-3.4
-0.7
0.2
0.6
1
5.0
9
2, build & maintain a heap
[1.2, 17, 'tom', array([-1.21815652, 0.49500269, 0.8530528 ])] 4
[1.2, 18, 'jerry', array([0.25093173, 0.28760793, 0.22501419])] 4
[1.2, 18, 'spike'] 3
[1.2, 18, 'spike', array([0.8459503 , 0.72552735, 0.71327913])] 2
[3.4, 18, 'Toodles Galore', array([0.58576024, 0.3357778 , 1.52392345])] 1
[5.6, 19, 'tara', array([-1.28955814, -2.23286344, -1.25304415])] 0
3, wrong usage
[1.2, 17, 'tom', array([-1.21815652, 0.49500269, 0.8530528 ])]
[1.2, 18, 'jerry', array([0.25093173, 0.28760793, 0.22501419])]
[1.2, 18, 'spike']
[1.2, 18, 'spike', array([0.8459503 , 0.72552735, 0.71327913])]
[3.4, 18, 'Toodles Galore', array([0.58576024, 0.3357778 , 1.52392345])]
[5.6, 19, 'tara', array([-1.28955814, -2.23286344, -1.25304415])]