题目:如何得到一个数据流中的中位数?如果从数据流中读出奇数个数值,那么中位数就是所有数值排序之后位于中间的数值。如果从数据流中读出偶数个数值,那么中位数就是所有数值排序之后中间两个数的平均值。我们使用Insert()方法读取数据流,使用GetMedian()方法获取当前读取数据的中位数。
思路:应用两个堆(一个最大堆,一个最小堆),分别存储一半的数,最大堆存储小一半的数,最小堆的存储大一半的数,当数据流个数为偶数个的时候,中位数为两个堆顶的平均数,为奇数个时,中位数为最大堆的堆顶。(也可以是最小堆的堆顶,取决于具体实现时,两个堆的数字数目的分配)
# -*- coding:utf-8 -*-
class Solution:
def __init__(self):
self.maxheap = []
self.minheap = []
# 最大堆的调整
def heapq_max(self, numlist, n, i):
largest = i
l = 2*i+1
r = 2*i+2
if l<n and numlist[l] > numlist[i]:
largest = l
if r<n and numlist[r] > numlist[largest]:
largest = r
if largest != i:
numlist[i], numlist[largest] = numlist[largest], numlist[i]
self.heapq_max(numlist, n, largest)
# 最小堆的调整
def heapq_min(self, numlist, n, i):
smallest = i
l = 2*i+1
r = 2*i+2
if l<n and numlist[l] < numlist[i]:
smallest = l
if r<n and numlist[r] < numlist[smallest]:
smallest = r
if smallest != i:
numlist[i], numlist[smallest] = numlist[smallest], numlist[i]
self.heapq_max(numlist, n, smallest)
def Insert(self, num):
# write code here
# 当最大堆为空,即第一个数字进来时
if len(self.maxheap) == 0:
self.maxheap.append(num)
# 当最小堆为空,即第二个数字进来时
elif len(self.minheap) == 0:
self.minheap.append(max(self.maxheap[0], num))
self.maxheap[0] = min(self.maxheap[0], num)
# 当进来的数 大于最大堆的堆顶,小于最小堆的堆顶,查看最大堆和最小堆的size,决定将新进来的数加入最大堆或最小堆
elif num > self.maxheap[0] and num < self.minheap[0]:
if len(self.maxheap) == len(self.minheap):
self.maxheap.append(num)
# 调整插入后的list, 使其仍为最大堆
for i in range(len(self.maxheap), -1, -1):
self.heapq_max(self.maxheap, len(self.maxheap), i)
else:
# 调整插入后的list, 使其仍为最小堆
self.minheap.append(num)
for i in range(len(self.minheap), -1, -1):
self.heapq_min(self.minheap, len(self.minheap), i)
# 当新进来的数小于最大堆的堆顶,那么一定插入最大堆,根据当前最大堆和最小堆的size决定需不需要弹出最大堆堆顶,并插入最小堆,调整最小堆。
elif num <= self.maxheap[0]:
if len(self.maxheap) == len(self.minheap):
self.maxheap.append(num)
for i in range(len(self.maxheap), -1, -1):
self.heapq_max(self.maxheap, len(self.maxheap), i)
else:
self.minheap.append(self.maxheap[0])
for i in range(len(self.minheap), -1, -1):
self.heapq_min(self.minheap, len(self.minheap), i)
self.maxheap.append(num)
self.maxheap = self.maxheap[1:]
for i in range(len(self.maxheap), -1, -1):
self.heapq_max(self.maxheap, len(self.maxheap), i)
# 当新进来的数大于最小堆的堆顶,那么一定插入最小堆,根据最大堆和最小堆的size决定是否需要弹出最小堆堆顶,插入最大堆,调整最大堆。
elif num >= self.minheap[0]:
if len(self.maxheap) == len(self.minheap):
self.maxheap.append(self.minheap[0])
for i in range(len(self.maxheap), -1, -1):
self.heapq_max(self.maxheap, len(self.maxheap), i)
self.minheap.append(num)
self.minheap = self.minheap[1:]
#print('self.minheap is:')
# print(self.minheap)
for i in range(len(self.minheap), -1, -1):
self.heapq_min(self.minheap, len(self.minheap), i)
else:
self.minheap.append(num)
for i in range(len(self.minheap), -1, -1):
self.heapq_min(self.minheap, len(self.minheap), i)
def GetMedian(self, num):
# write code here
self.Insert(num)
if len(self.maxheap) == len(self.minheap):
# print(self.maxheap)
# print(self.minheap)
# 当最大堆最小堆size相同,和为偶数,则返回两个堆顶平均数
return (self.maxheap[0]+self.minheap[0])/2
else:
# print(self.maxheap)
# print(self.minheap)
# 当最大堆最小堆size和为奇数,则返回最大堆栈顶
return self.maxheap[0]