多路归并对1000 万无序数排序(外排序)

  算法思路:首先根据内存要求将排序文件分成k份,然后循环读这k个文件分别进行排序。  然后用同时打开这k个文件,读出每个文件的第一个数字,找出其中最小的数字写到result.txt文件中并且让这个具有最小数字的文件读取下一个数字,继续找k个文件中的最小数字,直到k个文件都被读取完为止,result 中就是最后排完序的结果。

多路归并,时间复杂度为O(k*n/k*logn/k )这种方法由于频繁进行IO操作,效率相对位图方法低一些。

#include <iostream>  
#include <string>  
#include <algorithm>  
#include <time.h>  
using namespace std;  
  
int sort_num = 10000000;  
int memory_size = 250000;    
  
//每次只对250k个小数据量进行排序  
int read_data(FILE *fp, int *space)  
{  
    int index = 0;  
    while (index < memory_size && fscanf(fp, "%d ", &space[index]) != EOF)  
        index++;  
    return index;  
}  
  
void write_data(FILE *fp, int *space, int num)  
{  
    int index = 0;  
    while (index < num)  
    {  
        fprintf(fp, "%d ", space[index]);  
        index++;  
    }  
}  
  
// check the file pointer whether valid or not.  
void check_fp(FILE *fp)  
{  
    if (fp == NULL)  
    {  
        cout << "The file pointer is invalid!" << endl;  
        exit(1);  
    }  
}  
  
int compare(const void *first_num, const void *second_num)  
{  
    return *(int *)first_num - *(int *)second_num;  
}  
  
string new_file_name(int n)  
{  
    char file_name[20];  
    sprintf(file_name, "data%d.txt", n);  
    return file_name;  
}  
  
int memory_sort()  
{  
    // open the target file.  
    FILE *fp_in_file = fopen("data.txt", "r");  
    check_fp(fp_in_file);  
    int counter = 0;  
    while (true)  
    {  
        // allocate space to store data read from file.  
        int *space = new int[memory_size];  
        int num = read_data(fp_in_file, space);  
        // the memory sort have finished if not numbers any more.  
        if (num == 0)  
            break;  
  
        // quick sort.  
        qsort(space, num, sizeof(int), compare);  
        // create a new auxiliary file name.  
        string file_name = new_file_name(++counter);  
        FILE *fp_aux_file = fopen(file_name.c_str(), "w");  
        check_fp(fp_aux_file);  
  
        // write the orderly numbers into auxiliary file.  
        write_data(fp_aux_file, space, num);  
        fclose(fp_aux_file);  
        delete []space;  
    }  
    fclose(fp_in_file);  
  
    // return the number of auxiliary files.  
    return counter;  
}  
  
void merge_sort(int file_num)  
{  
    if (file_num <= 0)  
        return;  
    // create a new file to store result.  
    FILE *fp_out_file = fopen("result.txt", "w");  
    check_fp(fp_out_file);  
  
    // allocate a array to store the file pointer.  
    FILE **fp_array = new FILE *[file_num];  
    int i;  
    for (i = 0; i < file_num; i++)  
    {  
        string file_name = new_file_name(i + 1);  
        fp_array[i] = fopen(file_name.c_str(), "r");  
        check_fp(fp_array[i]);  
    }  
  
    int *first_data = new int[file_num];     
    //new出个大小为0.1亿/250k数组,由指针first_data指示数组首地址  
    bool *finish = new bool[file_num];  
    memset(finish, false, sizeof(bool) * file_num);  
  
    // read the first number of every auxiliary file.  
    for (i = 0; i < file_num; i++)  
        fscanf(fp_array[i], "%d ", &first_data[i]);  
    while (true)  
    {  
        int index = 0;  
        while (index < file_num && finish[index])  
            index++;  
  
        // the finish condition of the merge sort.  
        if (index >= file_num)  
            break;  
        //主要的修改在上面两行代码,就是merge sort结束条件。  
        //要保证所有文件都读完,必须使得finish[0]...finish[40]都为真  
        //July、yansha,555,2011.05.29。  
  
        int min_data = first_data[index];  
        // choose the relative minimum in the array of first_data.  
        for (i = index + 1; i < file_num; i++)  
        {  
            if (min_data > first_data[i] && !finish[i])     
                //一旦发现比min_data更小的数据first_data[i]  
            {  
                min_data = first_data[i];      
                //则置min_data<-first_data[i]index = i;                     
                //把下标i 赋给index。  
            }  
        }  
  
        // write the orderly result to file.  
        fprintf(fp_out_file, "%d ", min_data);  
        if (fscanf(fp_array[index], "%d ", &first_data[index]) == EOF)  
            finish[index] = true;  
    }  
  
    fclose(fp_out_file);  
    delete []finish;  
    delete []first_data;  
    for (i = 0; i < file_num; i++)  
        fclose(fp_array[i]);  
    delete [] fp_array;  
}  
  
int main()  
{  
    clock_t start_memory_sort = clock();  
    int aux_file_num = memory_sort();  
    clock_t end_memory_sort = clock();  
    cout << "The time needs in memory sort: " << end_memory_sort - start_memory_sort << endl;  
    clock_t start_merge_sort = clock();  
    merge_sort(aux_file_num);  
    clock_t end_merge_sort = clock();  
    cout << "The time needs in merge sort: " << end_merge_sort - start_merge_sort << endl;  
    system("pause");  
    return 0;  
}  



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值