多路归并对1000 万无序数排序（外排序）

最新推荐文章于 2024-02-26 10:23:10 发布

v_yang_guang_v

最新推荐文章于 2024-02-26 10:23:10 发布

阅读量565

点赞数

分类专栏： july程序员编程艺术自写

本文链接：https://blog.csdn.net/v_yang_guang_v/article/details/46685095

版权

july程序员编程艺术自写专栏收录该内容

12 篇文章 0 订阅

订阅专栏

算法思路：首先根据内存要求将排序文件分成k份，然后循环读这k个文件分别进行排序。然后用同时打开这k个文件，读出每个文件的第一个数字，找出其中最小的数字写到result.txt文件中并且让这个具有最小数字的文件读取下一个数字，继续找k个文件中的最小数字，直到k个文件都被读取完为止，result 中就是最后排完序的结果。

多路归并，时间复杂度为O（k*n/k*logn/k ）这种方法由于频繁进行IO操作，效率相对位图方法低一些。

#include <iostream>  
#include <string>  
#include <algorithm>  
#include <time.h>  
using namespace std;  
  
int sort_num = 10000000;  
int memory_size = 250000;    
  
//每次只对250k个小数据量进行排序  
int read_data(FILE *fp, int *space)  
{  
    int index = 0;  
    while (index < memory_size && fscanf(fp, "%d ", &space[index]) != EOF)  
        index++;  
    return index;  
}  
  
void write_data(FILE *fp, int *space, int num)  
{  
    int index = 0;  
    while (index < num)  
    {  
        fprintf(fp, "%d ", space[index]);  
        index++;  
    }  
}  
  
// check the file pointer whether valid or not.  
void check_fp(FILE *fp)  
{  
    if (fp == NULL)  
    {  
        cout << "The file pointer is invalid!" << endl;  
        exit(1);  
    }  
}  
  
int compare(const void *first_num, const void *second_num)  
{  
    return *(int *)first_num - *(int *)second_num;  
}  
  
string new_file_name(int n)  
{  
    char file_name[20];  
    sprintf(file_name, "data%d.txt", n);  
    return file_name;  
}  
  
int memory_sort()  
{  
    // open the target file.  
    FILE *fp_in_file = fopen("data.txt", "r");  
    check_fp(fp_in_file);  
    int counter = 0;  
    while (true)  
    {  
        // allocate space to store data read from file.  
        int *space = new int[memory_size];  
        int num = read_data(fp_in_file, space);  
        // the memory sort have finished if not numbers any more.  
        if (num == 0)  
            break;  
  
        // quick sort.  
        qsort(space, num, sizeof(int), compare);  
        // create a new auxiliary file name.  
        string file_name = new_file_name(++counter);  
        FILE *fp_aux_file = fopen(file_name.c_str(), "w");  
        check_fp(fp_aux_file);  
  
        // write the orderly numbers into auxiliary file.  
        write_data(fp_aux_file, space, num);  
        fclose(fp_aux_file);  
        delete []space;  
    }  
    fclose(fp_in_file);  
  
    // return the number of auxiliary files.  
    return counter;  
}  
  
void merge_sort(int file_num)  
{  
    if (file_num <= 0)  
        return;  
    // create a new file to store result.  
    FILE *fp_out_file = fopen("result.txt", "w");  
    check_fp(fp_out_file);  
  
    // allocate a array to store the file pointer.  
    FILE **fp_array = new FILE *[file_num];  
    int i;  
    for (i = 0; i < file_num; i++)  
    {  
        string file_name = new_file_name(i + 1);  
        fp_array[i] = fopen(file_name.c_str(), "r");  
        check_fp(fp_array[i]);  
    }  
  
    int *first_data = new int[file_num];     
    //new出个大小为0.1亿/250k数组，由指针first_data指示数组首地址  
    bool *finish = new bool[file_num];  
    memset(finish, false, sizeof(bool) * file_num);  
  
    // read the first number of every auxiliary file.  
    for (i = 0; i < file_num; i++)  
        fscanf(fp_array[i], "%d ", &first_data[i]);  
    while (true)  
    {  
        int index = 0;  
        while (index < file_num && finish[index])  
            index++;  
  
        // the finish condition of the merge sort.  
        if (index >= file_num)  
            break;  
        //主要的修改在上面两行代码，就是merge sort结束条件。  
        //要保证所有文件都读完，必须使得finish[0]...finish[40]都为真  
        //July、yansha，555，2011.05.29。  
  
        int min_data = first_data[index];  
        // choose the relative minimum in the array of first_data.  
        for (i = index + 1; i < file_num; i++)  
        {  
            if (min_data > first_data[i] && !finish[i])     
                //一旦发现比min_data更小的数据first_data[i]  
            {  
                min_data = first_data[i];      
                //则置min_data<-first_data[i]index = i;                     
                //把下标i 赋给index。  
            }  
        }  
  
        // write the orderly result to file.  
        fprintf(fp_out_file, "%d ", min_data);  
        if (fscanf(fp_array[index], "%d ", &first_data[index]) == EOF)  
            finish[index] = true;  
    }  
  
    fclose(fp_out_file);  
    delete []finish;  
    delete []first_data;  
    for (i = 0; i < file_num; i++)  
        fclose(fp_array[i]);  
    delete [] fp_array;  
}  
  
int main()  
{  
    clock_t start_memory_sort = clock();  
    int aux_file_num = memory_sort();  
    clock_t end_memory_sort = clock();  
    cout << "The time needs in memory sort: " << end_memory_sort - start_memory_sort << endl;  
    clock_t start_merge_sort = clock();  
    merge_sort(aux_file_num);  
    clock_t end_merge_sort = clock();  
    cout << "The time needs in merge sort: " << end_merge_sort - start_merge_sort << endl;  
    system("pause");  
    return 0;  
}

v_yang_guang_v

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
多路归并对1000 万无序数排序（外排序）

算法思路：首先根据内存要求将排序文件分成k份，然后循环读这k个文件分别进行排序。然后用同时打开这k个文件，读出每个文件的第一个数字，找出其中最小的数字写到result.txt文件中并且让这个具有最小数字的文件读取下一个数字，继续找k个文件中的最小数字，直到k个文件都被读取完为止，result 中就是最后排完序的结果。多路归并，时间复杂度为O（k*n/k*logn/k ）这种方法由于频繁进行
复制链接

扫一扫