海量数据求最大K个数
数据太大,建堆时内存不足,可以建包含K个数的最小堆,然后后面的数依次与堆顶比较大小,比堆顶大的数就替换堆顶的数,然后向下排序,这样可以一直得到K个数是所有比较过的数中是最大的。
函数实现
#define N 1000
typedef int DataType;
void AdjustDown(DataType* a, size_t n, int root) //小堆
{
assert(a);
size_t child = root * 2 + 1;
while (child < n)
{
if (a[child] > a[child + 1] && child + 1 < n)
{
child++;
}
if (a[root] > a[child])
{
DataType tmp = a[root];
a[root] = a[child];
a[child] = tmp;
root = child;
child = child * 2 + 1;
}
else
{
break;
}
}
}
void MakeHeap(DataType* a, size_t n)
{
assert(a);
int i = (n - 2) >> 1;
for (; i > 0; i--)
{
AdjustDown(a, n, i);
}
}
void AdjustUp(DataType* a, size_t n, int child) //小堆
{
int parent = (child - 1)>>1;
while (child > 0)
{
if (a[parent] > a[child])
{
DataType tmp = a[parent];
a[parent] = a[child];
a[child] = tmp;
child = parent;
parent = (parent - 1)>>1;
}
else
{
break;
}
}
}
// topk 最大的前K
void TopK(DataType* a, size_t n, size_t k)
{
size_t i = k;
MakeHeap(a, k);
for (; i < n; i++)
{
if (a[0] < a[i])
{
DataType tmp = a[0];
a[0] = a[i];
a[i] = tmp;
AdjustDown(a, k, 0);
}
}
for (i = 0; i < 5; i++)
{
printf("%d ", a[i]);
}
printf("\n");
}
测试用例
void TestHeap()
{
int i = 0;
DataType a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 };
MakeHeap(a, sizeof(a) / sizeof(DataType));
DataType NArray[1000];
srand(time(0));
for (i = 0; i < 1000; ++i)
{
NArray[i] = rand() % 10000;
}
NArray[30] = 10001;
NArray[350] = 10002;
NArray[999] = 10003;
NArray[158] = 10004;
NArray[334] = 10005;
TopK(NArray, 1000, 5);
}
优先级队列
入队列时可以随便入,出队列时,优先级最高的先出
接口实现
typedef struct PriorityQueue
{
DataType _a[N];
size_t _size;
//DataType* _a;
//size_t _size;
//size_t _capacity;
}PriorityQueue;
void PriorityQueueInit(PriorityQueue* q)
{
assert(q);
memset(q->_a, 0, N);
q->_size = 0;
}
void PriorityQueuePush(PriorityQueue* q, DataType x)
{
assert(q);
if (q->_size == 0)
{
q->_a[0] = x;
q->_size++;
}
else
{
q->_a[q->_size] = x;
q->_size++;
AdjustUp(q->_a, q->_size,q->_size-1);
}
}
void PriorityQueuePop(PriorityQueue* q)
{
assert(q);
if (q->_size > 0)
{
q->_a[0] = q->_a[q->_size - 1];
q->_size--;
AdjustDown(q->_a, q->_size, 0);
}
}
DataType PriorityQueueTop(PriorityQueue* q)
{
assert(q);
if (q->_size > 0)
{
return q->_a[0];
}
}
size_t PriorityQueueSize(PriorityQueue* q)
{
assert(q);
return q->_size;
}
size_t PriorityQueueEmpty(PriorityQueue* q)
{
assert(q);
return q->_size;
}
void HeapSort(DataType* a, size_t n)
{
assert(a);
DataType tmp;
size_t i = 0;
for (i; i < n-1; i++)
{
tmp = a[0];
a[0] = a[n - i - 1];
a[n - i - 1] = tmp;
AdjustDown(a, n - i - 1, 0);
}
}
测试用例
void TestPriorityQueue()
{
PriorityQueue q;
PriorityQueueInit(&q);
PriorityQueuePush(&q, 5);
PriorityQueuePush(&q, 2);
PriorityQueuePush(&q, 3);
PriorityQueuePush(&q, 7);
PriorityQueuePush(&q, 6);
PriorityQueuePush(&q, 1);
PriorityQueuePush(&q, 4);
HeapSort(q._a, q._size);
for (size_t i = 0; i < q._size; i++)
{
printf("%d ", q._a[i]);
}
printf("\n");
//while (PriorityQueueEmpty(&q) != 0)
//{
// printf("%d ", PriorityQueueTop(&q));
// PriorityQueuePop(&q);
//}
//printf("\n");
}