方法1: O(n)类似于快速排序,如果基于数组的第k个数字来调整。位于数组中左边的k个数字就是最小的k个数字
方法2:O(nlog(k)),适合海量数据。先保存k个数,然后拿k个数中的最大值大于数组中下一个值比较,若最大值大于数组中下一个值,则删除并插入数组中的值。
因为要删除最大元素首先想到用最大堆,但是还需要插入操作,可以用最大堆或红黑树实现,
STL 中的set和multiset都是基于红黑树实现的,此处基于stl中的multiset
typedef multiset<int,greater<int> > intSet;
typedef multiset<int,greater<int> >::iterator setIterator;
#include<iostream>
#include<set>
#include<vector>
using namespace std;
int Partition(int *a,int left,int right)
{
if(a==NULL || left<0 || right<left)
throw new exception("invalid parameters");
int index = left+rand() % ( right - left + 1);
swap(a[index],a[right]);
int i=left-1;
for(int j=left;j<right;j++)
{
if(a[j]<a[right])
{
i++;
if(i!=j)
swap(a[i],a[j]);
}
}
++i;
swap(a[i],a[right]);
return i;
}
void GetLeastNumbers1(int *numbers,int length,int *result ,int k) //方法1
{
if(numbers==NULL || length<=0 || length<k || k<=0)
return ;
int start = 0;
int end = length - 1;
int index = Partition(numbers, start, end);
while(index != k-1)
{
if(index > k-1)
{
end = index - 1;
index = Partition(numbers, start, end);
}
else
{
start = index + 1;
index = Partition(numbers, start, end);
}
}
for(int i=0;i<k;i++)
result[i]=numbers[i];
}
typedef multiset<int,greater<int> > intSet;
typedef multiset<int,greater<int> >::iterator setIterator;
void GetLeastNumbers2(const vector<int> & data , intSet & leastNumbers, int k) //方法2
{
if(data.size()<k || k<1 )
return ;
vector<int>::const_iterator iter = data.begin();
for(;iter!=data.end();iter++)
{
if(leastNumbers.size()<k)
leastNumbers.insert(*iter);
else
{
setIterator iterGreatest = leastNumbers.begin();
if(*iter < *(iterGreatest))
{
leastNumbers.erase(iterGreatest);
leastNumbers.insert(*iter);
}
}
}
}
// ====================测试代码====================
void Test(char* testName, int* data, int n, int* expectedResult, int k)
{
if(testName != NULL)
cout<<testName<<endl;
vector<int> vectorData;
for(int i = 0; i < n; ++ i)
vectorData.push_back(data[i]);
if(expectedResult == NULL)
cout<<"The input is invalid, we don't expect any result."<<endl;
else
{
cout<<"Expected result: "<<endl;
for(int i = 0; i < k; ++ i)
cout<< expectedResult[i]<<"\t";
cout<<endl;
}
cout<<"Result for solution1:"<<endl;
int* output = new int[k];
GetLeastNumbers1(data, n, output, k);
if(expectedResult != NULL)
{
for(int i = 0; i < k; ++ i)
cout<<output[i]<<"\t";
cout<<endl;
}
delete[] output;
cout<<"Result for solution2:"<<endl;
intSet leastNumbers;
GetLeastNumbers2(vectorData, leastNumbers, k);
for(setIterator iter = leastNumbers.begin(); iter != leastNumbers.end(); ++iter)
cout<< *iter<<"\t";
cout<<endl<<endl;
}
// k小于数组的长度
void Test1()
{
int data[] = {4, 5, 1, 6, 2, 7, 3, 8};
int expected[] = {1, 2, 3, 4};
Test("Test1", data, sizeof(data) / sizeof(int), expected, sizeof(expected) / sizeof(int));
}
// k等于数组的长度
void Test2()
{
int data[] = {4, 5, 1, 6, 2, 7, 3, 8};
int expected[] = {1, 2, 3, 4, 5, 6, 7, 8};
Test("Test2", data, sizeof(data) / sizeof(int), expected, sizeof(expected) / sizeof(int));
}
// k大于数组的长度
void Test3()
{
int data[] = {4, 5, 1, 6, 2, 7, 3, 8};
int* expected = NULL;
Test("Test3", data, sizeof(data) / sizeof(int), expected, 10);
}
// k等于1
void Test4()
{
int data[] = {4, 5, 1, 6, 2, 7, 3, 8};
int expected[] = {1};
Test("Test4", data, sizeof(data) / sizeof(int), expected, sizeof(expected) / sizeof(int));
}
// k等于0
void Test5()
{
int data[] = {4, 5, 1, 6, 2, 7, 3, 8};
int* expected = NULL;
Test("Test5", data, sizeof(data) / sizeof(int), expected, 0);
}
// 数组中有相同的数字
void Test6()
{
int data[] = {4, 5, 1, 6, 2, 7, 2, 8};
int expected[] = {1, 2};
Test("Test6", data, sizeof(data) / sizeof(int), expected, sizeof(expected) / sizeof(int));
}
// 输入空指针
void Test7()
{
int* expected = NULL;
Test("Test7", NULL, NULL, expected, 0);
}
int main(int argc,char* argv[])
{
Test1();
Test2();
Test3();
Test4();
Test5();
Test6();
Test7();
return 0;
}
typedef multiset<int,greater<int> > intSet;
typedef multiset<int,greater<int> >::iterator setIterator;