参考资料:
http://bbs.csdn.net/topics/250038051
【1】遍历数组,求出最大值,与arr[0]元素交换,再在arr[1]到arr[100000000-1]之间求最大值,与arr[1]交换。。类似选择排序。
#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <stdlib.h>
using namespace std;
const int N=100000000; // 1亿
const int M=10000; // top 1万
const int x=0x40000000-1;
int arr[N];
int main()
{
// 生成随机数
int i=0;
for(i=0; i<N;i++)
arr[i]=rand() & x;
struct timeval starttime,endtime;
gettimeofday(&starttime,0);
// 选择排序。记录各个最大值
for(i=0;i<M; i++)
{
int j;
int m=i;
for(j=i+1; j<N; j++)
{
if(arr[j]>arr[m])
m=j;
}
if(i!=m)
{
int t=arr[i];
arr[i]=arr[m];
arr[m]=t;
}
}
gettimeofday(&endtime,0);
double timeuse = 1000000*(endtime.tv_sec - starttime.tv_sec) + endtime.tv_usec - starttime.tv_usec;
timeuse /=1000;
cout<<timeuse<<" ms"<<endl;
cout<<"0-10"<<endl;
copy(arr,arr+5, ostream_iterator<int>(cout, " ") );
cout<<"9990-10000"<<endl;
copy(arr+9990,arr+10000, ostream_iterator<int>(cout, " ") );
return 0;
}
运行结果:
chen@chen-book1:~$ time ./count_0
-634229 ms
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 9990-10000
1073634978 1073634973 1073634956 1073634953 1073634935 1073634926 1073634918 1073634907 1073634906 1073634904
real 61m3.238s
user 60m57.797s
sys 0m0.580s
chen@chen-book1:~$
一个小时。。等不住了吃完饭回来才跑完。
【2】对这1万个数排序,遍历剩下的数字,如果比一万个数字中的最小值大,那么将其插入到合适位置。
#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;
const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];
inline void swap(int &a, int &b)
{
int t=a;
a=b;
b=t;
}
int main()
{
int i=0;
for(i=0; i<N;i++)
arr[i]=rand() & x;
struct timeval starttime,endtime;
gettimeofday(&starttime,0);
//
sort(arr, arr+M);
for(i=M; i<N; i++)
{
if(arr[0] < arr[i])
swap(arr[0], arr[i]);
else
continue;
// dirty
// 1优化
if(arr[0] > arr[M-1] ) //优化
{
int t=arr[0];
memmove(arr, arr+1, sizeof(int)*(M-1) );
arr[M-1] = t;
}else
{
int j;
for(j=1; arr[0]>arr[j]; j++);
if(j==1) continue;
int t=arr[0];
memmove(arr, arr+1, sizeof(int)* (j-1) );
arr[j-1]=t;
}
/* 2 去掉优化
{
int j;
for(j=1; j<M && arr[0]>arr[j]; j++);
if(j==1) continue;
int t=arr[0];
memmove(arr, arr+1, sizeof(int)* (j-1) );
arr[j-1]=t;
}*/ //
/* 3 二分查找
int *p=lower_bound(arr+1, arr+M, arr[0]);
if(p!=arr)
{
int t=arr[0];
memcpy(arr,arr+1,sizeof(int) * (p-arr-1) );
*(p-1)=t;
}*/
}
//
gettimeofday(&endtime,0);
if(endtime.tv_usec<starttime.tv_usec)
{
endtime.tv_sec--;
endtime.tv_usec+=1000000;
}
cout<<(endtime.tv_sec-starttime.tv_sec)<<" s, "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
cout<<"0-10"<<endl;
copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
cout<<"90-100"<<endl;
copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );
return 0;
}
运行
chen@chen-book1:~$ time ./count_2
2 s, 106089 us
0-10
1073634904 1073634906 1073634907 1073634918 1073634926 1073634935 1073634953 1073634956 1073634973 1073634978
90-100
1073635879 1073635888 1073635895 1073635899 1073635902 1073635906 1073635909 1073635918 1073635937 1073635945
real 0m4.605s
user 0m4.212s
sys 0m0.352s
运算时间居然已经到了2.1ms!总时间4.6s,主要花在了生成随机数上。这里有个优化但是没有发挥作用,去掉优化:
chen@chen-book1:~$ time ./count_2
2 s, 40210 us
二分:
chen@chen-book1:~$ time ./count_2
0 s, 578971 us
0-10
1073634904 1073634906 1073634907 1073634918 1073634926 1073634935 1073634953 1073634956 1073634973 1073634978
90-100
1073635879 1073635888 1073635895 1073635899 1073635902 1073635906 1073635909 1073635918 1073635937 1073635945
real 0m3.285s
user 0m2.864s
sys 0m0.388s
优化:2.4s
去掉优化:2.1s
二分查找:579ms
记得当M=100时,二分的效果还比前两者要差,M=10000的时候就已经是前者的1/4了!半秒中有木有!!
PS:N=4亿时:
优化: 3.5s
去掉优化:3.3s
二分查找:1.85s
【3】堆排序。
建立一个小根堆,然后后面的依次跟堆顶比,如果比堆顶大,那么就跟堆顶换,然后调整堆。
#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;
const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];
inline void swap(int &a, int &b)
{
int t=a;
a=b;
b=t;
}
struct cmp
{
bool operator()(int &a, int &b){return a>=b;}
};
int main()
{
int i=0;
for(i=0; i<N;i++)
arr[i]=rand() & x;
struct timeval starttime,endtime;
gettimeofday(&starttime,0);
make_heap(arr, arr+M, cmp());
for(i=M; i<N; i++)
{
if(arr[i]>arr[0])
swap(arr[i],arr[0]);
__adjust_heap(arr,0,M,arr[0],cmp());
}
gettimeofday(&endtime,0);
cout<<(endtime.tv_sec-starttime.tv_sec)<<" s, "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
cout<<"0-10"<<endl;
copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
cout<<"90-100"<<endl;
copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );
/*
int a[10]={3,36,12,13,6,78,34,2,5,7};
copy(a,a+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
make_heap(a,a+5, cmp());
copy(a,a+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
for(i=5;i<10;i++)
{
if(a[i]>a[0])
swap(a[i],a[0]);
__adjust_heap(a,0,5,a[0],cmp());
}
copy(a,a+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
*/
return 0;
}
运行:
chen@chen-book1:~$ time ./count_2
34 s, -60310 us
34s!我最爱的堆排序,怎么会这么慢。。。
PS:程序貌似有问题
【4】当然,还有直接对整个数组排序,然后取前1万个。。。排序用快排
#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;
const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];
inline void swap(int &a, int &b)
{
int t=a;
a=b;
b=t;
}
struct cmp
{
bool operator()(const int &a, const int &b){return a>=b;}
};
int main()
{
int i=0;
for(i=0; i<N;i++)
arr[i]=rand() & x;
struct timeval starttime,endtime;
gettimeofday(&starttime,0);
//
sort(arr, arr+N, cmp() );
//
gettimeofday(&endtime,0);
cout<<(endtime.tv_sec-starttime.tv_sec)<<" s, "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
cout<<"0-10"<<endl;
copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
cout<<"90-100"<<endl;
copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );
return 0;
}
运行:
chen@chen-book1:~$ time ./count_2
44 s, 747807 us
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 1073741782 1073741777 1073741771 1073741756 1073741754
90-100
1073740859 1073740858 1073740844 1073740841 1073740837 1073740832 1073740826 1073740826 1073740811 1073740789
real 0m47.247s
user 0m46.807s
sys 0m0.340s
先快排再取前10000,用时44s。要是换成堆排序呢?把sort改成sort_heap,运行:
chen@chen-book1:~$ time ./count_2
67 s, -486231 us
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 1073741782 1073741777 1073741771 1073741756 1073741754
90-100
1073740859 1073740858 1073740844 1073740841 1073740837 1073740832 1073740826 1073740826 1073740811 1073740789
real 1m9.013s
user 1m8.540s
sys 0m0.340s
用了67s。快排的确是N logN排序算法里最快的。
【5】部分排序。STL里有,基于堆排序的,看看效果如何!
#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;
const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];
inline void swap(int &a, int &b)
{
int t=a;
a=b;
b=t;
}
struct cmp
{
bool operator()(const int &a, const int &b){return a>=b;}
};
int main()
{
int i=0;
for(i=0; i<N;i++)
arr[i]=rand() & x;
struct timeval starttime,endtime;
gettimeofday(&starttime,0);
//
partial_sort(arr, arr+M, arr+N, cmp() );
//
gettimeofday(&endtime,0);
if(endtime.tv_usec<starttime.tv_usec)
{
endtime.tv_sec--;
endtime.tv_usec+=1000000;
}
cout<<(endtime.tv_sec-starttime.tv_sec)<<" s, "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
cout<<"0-10"<<endl;
copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
cout<<"90-100"<<endl;
copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );
return 0;
}
运行:
chen@chen-book1:~$ time ./count_2
0 s, 729513 us
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 1073741782 1073741777 1073741771 1073741756 1073741754
90-100
1073740859 1073740858 1073740844 1073740841 1073740837 1073740832 1073740826 1073740826 1073740811 1073740789
real 0m3.282s
user 0m2.896s
sys 0m0.312s
用了730ms,很不错了!可是为什么我自己用堆排序来模拟,就要那么久呢!!
PS:当N为4亿时,时间为2.65s。
【6】类似计数排序。按比特位,先申请32长度的数组,第一个表示元素表示最高比特位为第0比特的元素个数,第31个元素表示最高比特位为31的元素个数。
遍历数组,得到了直方图。然后从后向前算,就可以知道,最高比特位为多少的整数,是肯定在top10000里的,例如,算得最高比特位为20的整数,肯定在top 10000里,
而最高比特位为19的整数,则有一部分是,有一部分不是。于是,再遍历一遍,将最高比特位为20的放在整个数组开头,再将最高比特位为19的紧随其后,再对最高比特位为19的这部分做部分排序即可!
#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;
const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];
inline void swap(int &a, int &b)
{
int t=a;
a=b;
b=t;
}
struct cmp
{
bool operator()(const int &a, const int &b){return a>=b;}
};
int hbit(int x)
{
int i=0;
while(x>>=1)i++;
return i;
}
int main()
{
int i=0;
for(i=0; i<N;i++)
arr[i]=rand() & x;
int bits[32]={0};
struct timeval starttime,endtime;
gettimeofday(&starttime,0);
//
for(i=0; i<N; i++)
{
bits[ hbit(arr[i]) ]++;
}
int s=0;
for(i=32-1; i>=0; i--)
{
s+=bits[i];
if(s>M)break;
}
int s0=s;
s-=bits[i];
int threshold= (1<<(i+1) );
i=0;
while(arr[i]>=threshold) ++i;
int j=i+1;
for(; j<N; j++)
{
if(arr[j]>=threshold)
{
swap(arr[i], arr[j]);
++i;
}
}
threshold>>=1;
if(s<M)
{
while(arr[i]>=threshold) ++i;
j=i+1;
for(; j<N; j++)
{
if(arr[j]>=threshold)
{
swap(arr[i], arr[j]);
++i;
}
}
partial_sort(arr+s, arr+M, arr+s0, cmp());
}
//
gettimeofday(&endtime,0);
if(endtime.tv_usec<starttime.tv_usec)
{
endtime.tv_sec--;
endtime.tv_usec+=1000000;
}
cout<<(endtime.tv_sec-starttime.tv_sec)<<" s, "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
cout<<"0-10"<<endl;
copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
cout<<"90-100"<<endl;
copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );
return 0;
}
运行:
chen@chen-book1:~$ time ./count_2
13 s, 863604 us
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 1073741782 1073741777 1073741771 1073741756 1073741754
90-100
1073740859 1073740858 1073740844 1073740841 1073740837 1073740832 1073740826 1073740826 1073740811 1073740789
real 0m16.352s
user 0m15.865s
sys 0m0.420s
13秒。怎么会这么久。。可能是移动的地方太多了。。
PS:N=4亿时,时间为55s。
【7】nth_element(arr,arr+M,arr+N, greater<int>() );
耗时:2.9s。
综上,第一名:对1万个小数组维持排序;
第二名:partial_sort based on heap