以下 n=10000,m=500,
一般可以想到的有两个方法。
[1] sort. O(nlogn)
[2] 将第一数组建立 min-heap,所有其他数组成员依次插入到 min-heap,每次完成插入后,删除当前最小值,即根元素。所有元素都筛过以后,min-heap 中的元素即为最大的 500 个。O(nlogm).
但是这两个方法都没有用到数组降序的特性,还有一个方法,可以将算法复杂度降到O(nlog20)。
方法描述:
1. 用20个数组的最后一个元素,也就是最小的元素组成一个最小堆。
2. 删除最小堆的第一个元素(也就是最小的元素),将相对应的数组中的前一个元素加入最小堆。
3. 重复第二步,直到已经删除了20*500 - 500个元素。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include<time.h>
#define ARR_NUM (20)
#define ARR_SIZE (500)
typedef struct _HeapElem
{
int arr_index;
int arr_pos;
int value;
}HeapElem;
static void HeapElemSwap(HeapElem *a, int i, int j)
{
HeapElem temp = a[i];
a[i] = a[j];
a[j] = temp;
}
static void ArraySwap(int *a, int i, int j)
{
int temp = a[i];
a[i] = a[j];
a[j] = temp;
}
static void BubbleSort(int *a, int size)
{
int j, len = size-1;
while(len > 0)
{
int pos = 0;
for(j=0; j<len; j++)
{
if(a[j] < a[j+1])
{
ArraySwap(a, j, j+1);
pos = j;
}
}
len=pos;
}
}
void MinHeapAdjust(HeapElem *a, int i, int size)
{
int left = 2*i+1;
int right = 2*i + 2;
int min = i;
if(i<size/2)
{
if(left < size && a[left].value < a[min].value)
min = left;
if(right < size && a[right].value < a[min].value)
min = right;
if(i != min)
{
HeapElemSwap(a, i, min);
MinHeapAdjust(a, min, size);
}
}
}
void BuildMinHeap(HeapElem *a, int size)
{
int i;
for(i=size/2-1; i>=0; i--)
MinHeapAdjust(a, i, size);
}
int ArrayVerify(int *a1, int *a2, int size)
{
while(size-- > 0)
{
if(a1[size] != a2[size])
return 0;
}
printf("Verify OK !\n");
return 1;
}
void ArrayPrint(int *a, int size)
{
int i;
for(i=0; i<size; i++)
printf("%d, ", a[i]);
printf("\n");
}
int * FindTopForce(int (*a)[ARR_SIZE], int arr_num, int top_num)
{
int i, j, idx, *total, *result;
total = (int *)malloc(arr_num*ARR_SIZE*sizeof(int));
if(total == NULL) { printf("total malloc failed\n"); return NULL;}
idx=0;
for(i=0; i<arr_num; i++)
for(j=0; j<ARR_SIZE; j++)
total[idx++] = a[i][j];
BubbleSort(total, arr_num*ARR_SIZE);
result = (int *)malloc(top_num * sizeof(int));
if(result == NULL) { free(total); printf("result malloc failed\n"); return NULL;}
memcpy(result, total, top_num * sizeof(int));
free(total);
return result;
}
int * FindTop(int (*a)[ARR_SIZE], int arr_num, int top_num)
{
HeapElem *minHeap;
int count = arr_num * ARR_SIZE - top_num;
int i, *result, idx;
minHeap = (HeapElem *)malloc(arr_num * sizeof(HeapElem));
if(minHeap == NULL) {printf("minHeap malloc failed\n"); return NULL;}
for(i=0; i<arr_num; i++)
{
minHeap[i].arr_index = i;
minHeap[i].arr_pos = ARR_SIZE-1;
minHeap[i].value = a[i][ARR_SIZE-1];
}
BuildMinHeap(minHeap, arr_num);
while(count-- > 0)
{
if(minHeap[0].arr_pos <= 0)
{
HeapElemSwap(minHeap, 0, arr_num-1);
arr_num--;
}
else
{
minHeap[0].arr_pos--;
minHeap[0].value = a[minHeap[0].arr_index][minHeap[0].arr_pos];
}
MinHeapAdjust(minHeap, 0, arr_num);
}
result = (int *)malloc(top_num * sizeof(int));
if(result == NULL) { free(minHeap); printf("result malloc failed\n"); return NULL;}
idx = 0;
for(i=0; i<arr_num; i++)
{
int j;
for(j=0; j<=minHeap[i].arr_pos; j++)
result[idx++] = a[minHeap[i].arr_index][j];
}
BubbleSort(result, top_num);
ArrayPrint(result, top_num);
free(minHeap);
return result;
}
int main()
{
int a[ARR_NUM][ARR_SIZE];
int i, j, *result_1, *result_2;
srand((int)time(0));
for(i=0; i<ARR_NUM; i++)
{
for(j=0; j<ARR_SIZE; j++)
a[i][j] = rand();
BubbleSort(a[i], ARR_SIZE);
}
result_1 = FindTop(a, ARR_NUM, 500);
result_2 = FindTopForce(a, ARR_NUM, 500);
ArrayVerify(result_1, result_2, 500);
free(result_1);
free(result_2);
return 0;
}