1亿个整数求top 10000

最新推荐文章于 2019-06-08 09:12:09 发布

纯属虚构

最新推荐文章于 2019-06-08 09:12:09 发布

阅读量2.1k

点赞数

分类专栏： STL

本文链接：https://blog.csdn.net/fall221/article/details/9140055

版权

STL 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

参考资料：

http://bbs.csdn.net/topics/250038051

【1】遍历数组，求出最大值，与arr[0]元素交换，再在arr[1]到arr[100000000-1]之间求最大值，与arr[1]交换。。类似选择排序。

#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <stdlib.h>
using namespace std;

const int N=100000000; // 1亿
const int M=10000;     // top 1万
const int x=0x40000000-1;
int arr[N];
int main()
{
    // 生成随机数
    int i=0;
    for(i=0; i<N;i++)  
        arr[i]=rand() & x;
    
    struct timeval starttime,endtime;
    gettimeofday(&starttime,0);
    
    // 选择排序。记录各个最大值
    for(i=0;i<M; i++)
    {
        int j;
        int m=i;
        for(j=i+1; j<N; j++)   
        {
            if(arr[j]>arr[m])
                m=j;
        }
        if(i!=m)
        {
            int t=arr[i];
            arr[i]=arr[m];
            arr[m]=t;
        }
        
    }
    
    gettimeofday(&endtime,0);
    double timeuse = 1000000*(endtime.tv_sec - starttime.tv_sec) + endtime.tv_usec - starttime.tv_usec;
    timeuse /=1000;
    cout<<timeuse<<" ms"<<endl;
    
    cout<<"0-10"<<endl;
    copy(arr,arr+5, ostream_iterator<int>(cout, " ") );
    cout<<"9990-10000"<<endl;
    copy(arr+9990,arr+10000, ostream_iterator<int>(cout, " ") );
    
    return 0;
    
}

运行结果：

chen@chen-book1:~$ time ./count_0
-634229 ms
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 9990-10000
1073634978 1073634973 1073634956 1073634953 1073634935 1073634926 1073634918 1073634907 1073634906 1073634904 
real	61m3.238s
user	60m57.797s
sys	0m0.580s
chen@chen-book1:~$

一个小时。。等不住了吃完饭回来才跑完。

【2】对这1万个数排序，遍历剩下的数字，如果比一万个数字中的最小值大，那么将其插入到合适位置。

#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;

const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];
inline void swap(int &a, int &b)
{
    int t=a;
    a=b;
    b=t;
}
int main()
{
    int i=0;
    for(i=0; i<N;i++)  
        arr[i]=rand() & x;
              
    struct timeval starttime,endtime;
    gettimeofday(&starttime,0);
    //
    sort(arr, arr+M);
    for(i=M; i<N; i++)
    {
        if(arr[0] < arr[i])
            swap(arr[0], arr[i]);
        else
            continue;
            
        // dirty
        // 1优化
        if(arr[0] > arr[M-1] )   //优化
        {
            int t=arr[0];
            memmove(arr, arr+1, sizeof(int)*(M-1) );
            arr[M-1] = t;
        }else
        {
            int j;
            for(j=1; arr[0]>arr[j]; j++);
            if(j==1) continue;
            int t=arr[0];
            memmove(arr, arr+1, sizeof(int)* (j-1) );
            arr[j-1]=t;
        }
        /* 2 去掉优化
        {
            int j;
            for(j=1; j<M && arr[0]>arr[j]; j++);
            if(j==1) continue;
            int t=arr[0];
            memmove(arr, arr+1, sizeof(int)* (j-1) );
            arr[j-1]=t;
            
        }*/ // 
            /* 3 二分查找
            int *p=lower_bound(arr+1, arr+M, arr[0]);
            if(p!=arr)
            {
                int t=arr[0];
                memcpy(arr,arr+1,sizeof(int) * (p-arr-1) );
                *(p-1)=t;
            }*/


        
    }
    //
    gettimeofday(&endtime,0);
    if(endtime.tv_usec<starttime.tv_usec)
    {
        endtime.tv_sec--;
        endtime.tv_usec+=1000000;
    }
    cout<<(endtime.tv_sec-starttime.tv_sec)<<" s,  "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
    
    cout<<"0-10"<<endl;
    copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
    cout<<endl;
    cout<<"90-100"<<endl;
    copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );
    
    return 0;
    
}

运行

chen@chen-book1:~$ time ./count_2
2 s,  106089 us
0-10
1073634904 1073634906 1073634907 1073634918 1073634926 1073634935 1073634953 1073634956 1073634973 1073634978 
90-100
1073635879 1073635888 1073635895 1073635899 1073635902 1073635906 1073635909 1073635918 1073635937 1073635945 
real    0m4.605s
user    0m4.212s
sys    0m0.352s

运算时间居然已经到了2.1ms！总时间4.6s，主要花在了生成随机数上。这里有个优化但是没有发挥作用，去掉优化：

chen@chen-book1:~$ time ./count_2
2 s,  40210 us

二分：

chen@chen-book1:~$ time ./count_2
0 s,  578971 us
0-10
1073634904 1073634906 1073634907 1073634918 1073634926 1073634935 1073634953 1073634956 1073634973 1073634978
90-100
1073635879 1073635888 1073635895 1073635899 1073635902 1073635906 1073635909 1073635918 1073635937 1073635945
real    0m3.285s
user    0m2.864s
sys    0m0.388s

优化：2.4s

去掉优化：2.1s

二分查找：579ms

记得当M=100时，二分的效果还比前两者要差，M=10000的时候就已经是前者的1/4了！半秒中有木有！！

PS：N=4亿时：

优化: 3.5s

去掉优化：3.3s

二分查找：1.85s

【3】堆排序。

建立一个小根堆，然后后面的依次跟堆顶比，如果比堆顶大，那么就跟堆顶换，然后调整堆。

#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;

const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];

inline void swap(int &a, int &b)
{
    int t=a;
    a=b;
    b=t;
}
struct cmp
{
   bool operator()(int &a, int &b){return a>=b;}
};
int main()
{

    int i=0;
    for(i=0; i<N;i++)  
        arr[i]=rand() & x;
              
    struct timeval starttime,endtime;
    gettimeofday(&starttime,0);

    make_heap(arr, arr+M, cmp());
    for(i=M; i<N; i++)
    {
        if(arr[i]>arr[0])
            swap(arr[i],arr[0]);
        __adjust_heap(arr,0,M,arr[0],cmp());
    }
    gettimeofday(&endtime,0);
    cout<<(endtime.tv_sec-starttime.tv_sec)<<" s,  "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
    
    cout<<"0-10"<<endl;
    copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
    cout<<endl;
    cout<<"90-100"<<endl;
    copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );


/*
int a[10]={3,36,12,13,6,78,34,2,5,7};
copy(a,a+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
make_heap(a,a+5, cmp());
copy(a,a+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
for(i=5;i<10;i++)
{
    if(a[i]>a[0])
        swap(a[i],a[0]);
    __adjust_heap(a,0,5,a[0],cmp());
}
copy(a,a+10, ostream_iterator<int>(cout, " ") );
cout<<endl;
*/
    return 0;
    
}

运行：

chen@chen-book1:~$ time ./count_2
34 s,  -60310 us

34s！我最爱的堆排序，怎么会这么慢。。。

PS：程序貌似有问题

【4】当然，还有直接对整个数组排序，然后取前1万个。。。排序用快排

#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;

const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];

inline void swap(int &a, int &b)
{
    int t=a;
    a=b;
    b=t;
}
struct cmp
{
   bool operator()(const int &a, const int &b){return a>=b;}
};
int main()
{

    int i=0;
    for(i=0; i<N;i++)  
        arr[i]=rand() & x;
              
    struct timeval starttime,endtime;
    gettimeofday(&starttime,0);

    //

    sort(arr, arr+N, cmp() );
    //
    gettimeofday(&endtime,0);
    cout<<(endtime.tv_sec-starttime.tv_sec)<<" s,  "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
    
    cout<<"0-10"<<endl;
    copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
    cout<<endl;
    cout<<"90-100"<<endl;
    copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );

    return 0;
    
}

运行：

chen@chen-book1:~$ time ./count_2
44 s,  747807 us
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 1073741782 1073741777 1073741771 1073741756 1073741754 
90-100
1073740859 1073740858 1073740844 1073740841 1073740837 1073740832 1073740826 1073740826 1073740811 1073740789 
real	0m47.247s
user	0m46.807s
sys	0m0.340s

先快排再取前10000，用时44s。要是换成堆排序呢？把sort改成sort_heap，运行：

chen@chen-book1:~$ time ./count_2
67 s, -486231 us
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 1073741782 1073741777 1073741771 1073741756 1073741754
90-100
1073740859 1073740858 1073740844 1073740841 1073740837 1073740832 1073740826 1073740826 1073740811 1073740789
real   1m9.013s
user   1m8.540s
sys   0m0.340s

用了67s。快排的确是N logN排序算法里最快的。

【5】部分排序。STL里有，基于堆排序的，看看效果如何！

#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;

const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];

inline void swap(int &a, int &b)
{
    int t=a;
    a=b;
    b=t;
}
struct cmp
{
   bool operator()(const int &a, const int &b){return a>=b;}
};
int main()
{

    int i=0;
    for(i=0; i<N;i++)  
        arr[i]=rand() & x;
              
    struct timeval starttime,endtime;
    gettimeofday(&starttime,0);

    //

    partial_sort(arr, arr+M, arr+N, cmp() );
    //
    gettimeofday(&endtime,0);
    if(endtime.tv_usec<starttime.tv_usec)
    {
        endtime.tv_sec--;
        endtime.tv_usec+=1000000;
    }
    cout<<(endtime.tv_sec-starttime.tv_sec)<<" s,  "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
    
    cout<<"0-10"<<endl;
    copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
    cout<<endl;
    cout<<"90-100"<<endl;
    copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );

    return 0;
    
}

运行：

chen@chen-book1:~$ time ./count_2
0 s, 729513 us
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 1073741782 1073741777 1073741771 1073741756 1073741754
90-100
1073740859 1073740858 1073740844 1073740841 1073740837 1073740832 1073740826 1073740826 1073740811 1073740789
real   0m3.282s
user   0m2.896s
sys   0m0.312s

用了730ms，很不错了！可是为什么我自己用堆排序来模拟，就要那么久呢！！

PS：当N为4亿时，时间为2.65s。

【6】类似计数排序。按比特位，先申请32长度的数组，第一个表示元素表示最高比特位为第0比特的元素个数，第31个元素表示最高比特位为31的元素个数。

遍历数组，得到了直方图。然后从后向前算，就可以知道，最高比特位为多少的整数，是肯定在top10000里的，例如，算得最高比特位为20的整数，肯定在top 10000里，

而最高比特位为19的整数，则有一部分是，有一部分不是。于是，再遍历一遍，将最高比特位为20的放在整个数组开头，再将最高比特位为19的紧随其后，再对最高比特位为19的这部分做部分排序即可！

#include <iostream>
#include <fstream>
#include <iterator>
#include <sys/time.h>
#include <string.h>
#include <algorithm>
using namespace std;

const int N=100000000;
const int M=10000;
const int x=0x40000000-1;
int arr[N];

inline void swap(int &a, int &b)
{
    int t=a;
    a=b;
    b=t;
}
struct cmp
{
   bool operator()(const int &a, const int &b){return a>=b;}
};
int hbit(int x)
{
    int i=0;
    while(x>>=1)i++;
    return i;
}
int main()
{

    int i=0;
    for(i=0; i<N;i++)  
        arr[i]=rand() & x;
    
    int bits[32]={0};
    struct timeval starttime,endtime;
    gettimeofday(&starttime,0);

    //

    for(i=0; i<N; i++)
    {
        bits[ hbit(arr[i]) ]++;
    }
    int s=0;
    for(i=32-1; i>=0; i--)
    {
        s+=bits[i];
        if(s>M)break;
    }
    int s0=s;
    s-=bits[i];
    int threshold= (1<<(i+1) );

    i=0;
    while(arr[i]>=threshold) ++i;
    int j=i+1;
    for(; j<N; j++)
    {
        if(arr[j]>=threshold)
        {
            swap(arr[i], arr[j]);
            ++i; 
        }
    }
    threshold>>=1;
    if(s<M)
    {
        while(arr[i]>=threshold) ++i;
        j=i+1;
        for(; j<N; j++)
        {
            if(arr[j]>=threshold)
            {
                swap(arr[i], arr[j]);
                ++i; 
            }
            
        }
        partial_sort(arr+s, arr+M, arr+s0, cmp());
        
    }
    //
    gettimeofday(&endtime,0);
    if(endtime.tv_usec<starttime.tv_usec)
    {
        endtime.tv_sec--;
        endtime.tv_usec+=1000000;
    }
    cout<<(endtime.tv_sec-starttime.tv_sec)<<" s,  "<<(endtime.tv_usec-starttime.tv_usec)<<" us"<<endl;
    
    cout<<"0-10"<<endl;
    copy(arr,arr+10, ostream_iterator<int>(cout, " ") );
    cout<<endl;
    cout<<"90-100"<<endl;
    copy(arr+90,arr+100, ostream_iterator<int>(cout, " ") );

    return 0;
    
}

运行：

chen@chen-book1:~$ time ./count_2
13 s, 863604 us
0-10
1073741822 1073741791 1073741788 1073741787 1073741783 1073741782 1073741777 1073741771 1073741756 1073741754
90-100
1073740859 1073740858 1073740844 1073740841 1073740837 1073740832 1073740826 1073740826 1073740811 1073740789
real   0m16.352s
user   0m15.865s
sys   0m0.420s

13秒。怎么会这么久。。可能是移动的地方太多了。。

PS：N=4亿时，时间为55s。

【7】nth_element(arr,arr+M,arr+N, greater<int>() );

耗时：2.9s。

综上，第一名：对1万个小数组维持排序；

第二名：partial_sort based on heap

纯属虚构

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
1亿个整数求top 10000

参考资料：http://bbs.csdn.net/topics/250038051【1】遍历数组，求出最大值，与arr[0]元素交换，再在arr[1]到arr[100000000-1]之间求最大值，与arr[1]交换。。类似选择排序。#include #include #include #include #include using namespace
复制链接

扫一扫

专栏目录