外排序之文件归并排序实现

最新推荐文章于 2024-10-06 14:10:45 发布

Novice grey

最新推荐文章于 2024-10-06 14:10:45 发布

阅读量433

点赞数 6

文章标签：算法数据结构归并排序排序算法 c语言

本文链接：https://blog.csdn.net/m0_74251523/article/details/140748605

版权

在处理大数据集合时，内存限制往往会成为排序任务的瓶颈。传统的排序算法如快速排序、归并排序等，在数据量超出可用内存时不再适用，这就需要采用外排序技术。外排序的文件规并排序是一种处理大文件排序的有效方法，它通过将大文件分成多个小块，分别排序后合并以实现整体排序。在这篇博客中，我们将深入探讨如何用C语言实现外排序的文件规并排序算法。我们会详细讨论数据的划分、子文件的生成、排序以及最终的合并过程，并且会提供一些优化技术来提升效率，比如使用多路归并排序和优化I/O操作。这篇文章旨在为读者提供深入理解外排序概念的机会，同时展示如何在C语言环境中实现这一过程。

文章目录

前言
创建随机数据文件的代码
文件归并排序思路分析
文件归并排序代码实现
总结

前言

外排序介绍

外排序（External sorting）是指能够处理极大量数据的排序算法。通常来说，外排序处理的数据不能一次装入内存，只能放在读写较慢的外存储器(通常是硬盘)上。外排序通常采用的是一种“排序-归并”的策略。在排序阶段，先读入能放在内存中的数据量，将其排序输出到⼀个临时文件，依此进行，将待排序数据组织为多个有序的临时文件。然后在归并阶段将这些临时文件组合为⼀个大的有序文件，也即排序结果。跟外排序对应的就是内排序，之前讲的常见的排序，都是内排序，它们排序思想适应的是数据在内存中，支持随机访问。归并排序的思想不需要随机访问数据，只需要依次按序列读取数据，所以归并排序既是一个内排序，也是一个外排序。

一、创建随机数据文件的代码

// 创建N个随机数，写到⽂件中
void CreateNDate()
{
	// 造数据
	int n = 1000000;
	srand(time(0));
	const char* file = "data.txt";
	FILE* fin = fopen(file, "w");
	if (fin == NULL)
	{
		perror("fopen error");
		return;
	}
	for (int i = 0; i < n; ++i)
	{
		int x = rand() + i;
		fprintf(fin, "%d\n", x);
	}
	fclose(fin);
}

二、文件归并排序思路分析

1. 读取n个值排序后写到file1，再读取n个值排序后写到file2

2. file1和file2利用归并排序的思想，依次读取比较，取小的尾插到mfile，mfile归并为⼀个有序文件3. 将file1和file2删掉，mfile重命名为file1

4. 再次读取n个数据排序后写到file2

5. 继续走file1和file2归并，重复步骤2，直到文件中无法读出数据。最后归并出的有序数据放到了 file1中

三、文件归并排序代码实现

#define _CRT_SECURE_NO_WARNINGS 1
#include<stdio.h>
#include<time.h>
#include<stdlib.h>

// 创建N个随机数，写到文件中
void CreateNDate()
{
    // 造数据
    int n = 10000000;
    srand(time(0));
    const char* file = "data.txt";
    FILE* fin = fopen(file, "w");
    if (fin == NULL)
    {
        perror("fopen error");
        return;
    }

    for (int i = 0; i < n; ++i)
    {
        int x = rand() + i;
        fprintf(fin, "%d\n", x);
    }

    fclose(fin);
}

int compare(const void* a, const void* b)
{
    return (*(int*)a - *(int*)b);
}

// 返回实际读到的数据个数，没有数据了，返回0
int ReadNDataSortToFile(FILE* fout, int n, const char* file1)
{
    int x = 0;
    int* a = (int*)malloc(sizeof(int) * n);
    if (a == NULL)
    {
        perror("malloc error");
        return 0;
    }

    // 想读取n个数据，如果遇到文件结束，应该读到j个
    int j = 0;
    for (int i = 0; i < n; i++)
    {
        if (fscanf(fout, "%d", &x) == EOF)
            break;

        a[j++] = x;
    }

    if (j == 0)
    {
        free(a);
        return 0;
    }

    // 排序
    qsort(a, j, sizeof(int), compare);

    FILE* fin = fopen(file1, "w");
    if (fin == NULL)
    {
        free(a);
        perror("fopen error");
        return 0;
    }

    // 写回file1文件
    for (int i = 0; i < j; i++)
    {
        fprintf(fin, "%d\n", a[i]);
    }

    free(a);
    fclose(fin);

    return j;
}

void MergeFile(const char* file1, const char* file2, const char* mfile)
{
    FILE* fout1 = fopen(file1, "r");
    if (fout1 == NULL)
    {
        perror("fopen error");
        return;
    }

    FILE* fout2 = fopen(file2, "r");
    if (fout2 == NULL)
    {
        perror("fopen error");
        return;
    }

    FILE* mfin = fopen(mfile, "w");
    if (mfin == NULL)
    {
        perror("fopen error");
        return;
    }

    // 归并逻辑
    int x1 = 0;
    int x2 = 0;
    int ret1 = fscanf(fout1, "%d", &x1);
    int ret2 = fscanf(fout2, "%d", &x2);
    while (ret1 != EOF && ret2 != EOF)
    {
        if (x1 < x2)
        {
            fprintf(mfin, "%d\n", x1);
            ret1 = fscanf(fout1, "%d", &x1);
        }
        else
        {
            fprintf(mfin, "%d\n", x2);
            ret2 = fscanf(fout2, "%d", &x2);
        }
    }

    while (ret1 != EOF)
    {
        fprintf(mfin, "%d\n", x1);
        ret1 = fscanf(fout1, "%d", &x1);
    }

    while (ret2 != EOF)
    {
        fprintf(mfin, "%d\n", x2);
        ret2 = fscanf(fout2, "%d", &x2);
    }

    fclose(fout1);
    fclose(fout2);
    fclose(mfin);
}

int main()
{
    CreateNDate();

    const char* file1 = "file1.txt";
    const char* file2 = "file2.txt";
    const char* mfile = "mfile.txt";

    FILE* fout = fopen("data.txt", "r");
    if (fout == NULL)
    {
        perror("fopen error");
        return;
    }

    int m = 1000000;
    ReadNDataSortToFile(fout, m, file1);
    ReadNDataSortToFile(fout, m, file2);

    while (1)
    {
        MergeFile(file1, file2, mfile);

        // 删除file1和file2
        remove(file1);
        remove(file2);

        // 重命名mfile为file1
        rename(mfile, file1);

        // 当再去读取数据，一个都读不到，说明已经没有数据了
        // 已经归并完成，归并结果在file1
        int n = 0;
        if ((n = ReadNDataSortToFile(fout, m, file2)) == 0)
            break;

        /*if (n < 100)
        {
            int x = 0;
        }*/
    }

    return 0;
}