"""
算法面试:10亿个数中取TOP-1000个数
堆的性质:每一个节点比它的左右子节点小,
先取前N个数,构成小顶堆,即在内存中维护一个1000数的小顶堆
然后对文件中读取数据,和堆顶比较:
if 比堆顶小,则丢弃
if 比堆顶大,替换根节点,并且调整堆,保持小顶堆的性质
所有数据处理完,得到的即是Top-N
"""
class TopN:
def parent(self, n):
return int((n - 1) / 2)
def left(self, n):
return 2 * n + 1
def right(self, n):
return 2 * n + 2
def buildHeap(self, n, data):
for i in range(1, n):
t = i
while t != 0 and data[t] < data[self.parent(t)]:
temp = data[t]
data[t] = data[self.parent(t)]
data[self.parent(t)] = temp
t = self.parent(t)
def adjust(self, i, n, data):
if data[i] <= data[0]:
return
temp = data[i]
data[i] = data[0]
data[0] = temp
t = 0
while (self.left(t) < n and data[self.left(t)] < data[t]) or (
self.right(t) < n and data[self.right(t)] < data[t]):
if self.right(t) < n and data[self.right(t)] < data[self.left(t)]:
temp = data[t]
data[t] = data[self.right(t)]
data[self.right(t)] = temp
t = self.right(t)
else:
temp = data[t]
data[t] = data[self.left(t)]
data[self.left(t)] = temp
t = self.left(t)
def findTopN(self, n, data):
self.buildHeap(n, data);
for i in range(n, len(data)):
self.adjust(i, n, data)
return data
arr1 = [58, 26, 45, 18, 22, 39, 96, 75, 80, 65, 63, 28]
print("原数组:" + str(arr1))
topn = TopN()
result = topn.findTopN(5, arr1)
print("数组进行Top-N调整:" + str(result))
import random
N = 10
tempList = []
for i in range(20):
temp = random.randint(0, 1000)
tempList.append(temp)
print("原数组:" + str(tempList))
topn = TopN()
result = topn.findTopN(N, tempList)
temp = result[:N]
temp.sort(key=None, reverse=True)
print("数组进行Top-{0}调整:{1}".format(N,temp))
tempList.sort(key=None, reverse=True)
print("数组进行Top-{0}排序:{1}".format(N,tempList[:N]))
原数组:[58, 26, 45, 18, 22, 39, 96, 75, 80, 65, 63, 28]
数组进行Top-N调整:[63, 65, 80, 75, 96, 18, 22, 26, 39, 45, 58, 28]
原数组:[676, 931, 685, 823, 889, 428, 681, 445, 497, 940, 113, 102, 299, 224, 811, 157, 889, 41, 959, 250]
数组进行Top-10调整:[959, 940, 931, 889, 889, 823, 811, 685, 681, 676]
数组进行Top-10排序:[959, 940, 931, 889, 889, 823, 811, 685, 681, 676]