纯C实现大数据量浮点数排序

纯C实现大数据量浮点数排序

同学遇到的一道笔试题
仅仅是练习一下,做法可能不是最优。
纯C实现写的比较难受。。

  • 分析:数据量比较大, 栈空间有限,无法一次性内排序,需要使用外排序。
  • 以下我的做法:将大数据切分,切分出的数据快排后写到临时文件里,然后再多路选最小进行归并。
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>

#define CACHE_SIZE 1000010
#define MAX_HANNDLE 300
double cache[CACHE_SIZE];
int cacheUse = 0;
const double EPS = 1e-8;
const int BUNCH_SIZE = 1e5;

FILE* filesHandle[MAX_HANNDLE];
char dst[100];
int bunchCount = 0;
char* bunches[500];
int handleCount = 0;

void mySwap(double* v1, double* v2)
{
    double tmp = *v1;
    *v1 = *v2;
    *v2 = tmp;
}

int isEqual(double a, double b, double eps)
{
    return fabs(a - b) <= eps;
}

int isLess(double a, double b, double eps)
{
    return a - b < eps;
}

int isGreater(double a, double b, double eps)
{
    return isLess(b, a, eps);
}

int isLessEqual(double a, double b, double eps)
{
    return isLess(a, b, eps) || isEqual(a, b, eps);
}

int partialSort(double* data, int left, int right)
{
    int k = left;
    int i = left;
    int j = right;
    while(i < j)
    {
        while(j > i && isGreater(data[j], data[k], EPS))
        {
            --j;
        }
        while(i < j && isLessEqual(data[i], data[k], EPS))
        {
            ++i;
        }
        if(i < j)
        {
            mySwap(&data[i], &data[j]);
        }
    }
    mySwap(&data[k], &data[i]);
    return i;
}

void quickSort(double* data, int left, int right)
{
    if(left >= right) return;
    int mid = partialSort(data, left, right);
    quickSort(data, left, mid);
    quickSort(data, mid+1, right);
}

void print(double* data, int n)
{
	int i;
    for(i = 0; i < n; ++i)
    {
        printf("%lf\n", data[i]);
    }
}

int writeTo(const char* path, double* data, int n)
{
    // open file
    FILE* fp = fopen(path, "w");
    if(fp == NULL)
    {
        printf("open %s failed", path);
        return 0;
    }
    // write data
    if(n > 0) fprintf(fp, "%lf",data[0]);
    int i;
    for(i = 1; i < n; ++i)
    {
        fprintf(fp, " %lf",data[i]);
    }
    fprintf(fp, "\n");

    // close file
    fclose(fp);
    return 1;
}

int readFrom(const char* path)
{
    FILE* fp = fopen(path, "r");
    if(fp == NULL)
    {
        printf("open %s failed", path);
        return 0;
    }
    // read data
    double num = 0;
    int total = 0;
    while(fscanf(fp, "%lf", &num) != EOF)
    {
        printf("%f\n", num);
        ++total;
    }
    printf("total = %d\n", total);
    // close file
    fclose(fp);
    return 1;
}

void getDstPath(const char* path, int id)
{
    memset(dst, '\0', sizeof(dst));
    char suffix[100];
    memset(suffix, '\0', sizeof(suffix));
    sprintf(suffix, "_%d_sorted.txt", id);
    char* pos = strchr(path, '.');
    int cpyCnt = pos - path;
    strncpy(dst, path, cpyCnt);
    strcat(dst, suffix);
}

void recordBunch(const char* dst)
{
    int dstLen = strlen(dst);
    char* newBunch = (char*)malloc((dstLen+1) * sizeof(char));
    strncpy(newBunch, dst, dstLen);
    newBunch[dstLen] = '\0';
    bunches[bunchCount++] = newBunch;
}

void processBunch(const char* src)
{
    getDstPath(src, bunchCount);

    printf("%s: nums total = %d\n", dst, cacheUse);
    quickSort(cache, 0, cacheUse-1);

    recordBunch(dst);

    writeTo(dst, cache, cacheUse);

    cacheUse = 0;
}

void splitBunch(const char* path)
{
    puts(path);
    FILE* fp = fopen(path, "r");
    if(fp == NULL)
    {
        printf("open %s failed", path);
        return;
    }
     // read data
    double num = 0;
    cacheUse = 0;
    while(fscanf(fp, "%lf", &num) != EOF)
    {
        //printf("%lf\n", num);
        cache[cacheUse] = num;
        ++cacheUse;
        if(cacheUse == BUNCH_SIZE)
        {
            processBunch(path);
        }
    }
    if(cacheUse > 0)
    {
        processBunch(path);
    }
    // close file
    fclose(fp);
    return;
}

void generateData(const char* path, int maxData, int total)
{
    srand(time(0));
    int factorMax = 20;
    int factorMin = 10;
    puts(path);
    FILE* fp = fopen(path, "w");
    if(fp == NULL)
    {
        printf("open %s failed", path);
        return;
    }
    int i;
    for(i = 0; i < total; ++i)
    {
         int iNum = rand() % maxData + 1; // [1, maxData]
         int iFactor = rand() % (factorMax - factorMin) + factorMin; // [10,20)
         double fFactor = iFactor / (double) 10; // [1.0, 2.0]
         double fNum = iNum * fFactor;
         fprintf(fp, "%lf ", fNum);
    }
    printf("generate toatal = %d\n", total);
    fclose(fp);
}

void testStack(int cnt)
{
    char data[1024 * 1024] = {0};
    ++cnt;
    printf("%s %d stackSize = %d MB\n", __FUNCTION__, __LINE__, cnt);
    testStack(cnt);
}

void clearBunches()
{
	int i;
    for(i = 0; i < bunchCount; ++i)
    {
        printf("all number sorted finished, remove temp file:%s\n", bunches[i]);
        remove(bunches[i]);
        free(bunches[i]);
        bunches[i] = NULL;
    }
}

int getMin(double* nums)
{
    int minx = -1;
    int i;
    for(i = 0; i < handleCount; ++i)
    {
       if(filesHandle[i] == NULL) continue;
       if(minx == -1 || isLess(nums[i], nums[minx], EPS))
       {
           minx = i;
       }
    }
    return minx;
}

void mergeBunches()
{
    printf("bunch count = %d\n", bunchCount);
    handleCount = 0;
    int i;
    for(i = 0; i < bunchCount; ++i)
    {
        const char* path = bunches[i];
        printf("open bunch %s\n", path);
        FILE* fp = fopen(path, "r");
        if(fp == NULL)
        {
            printf("open %s failed", path);
            continue;
        }
        filesHandle[handleCount++] = fp;
    }
    printf("handleCount count = %d\n", handleCount);

    double nums[handleCount];
    for(i = 0; i < handleCount; ++i)
    {
        fscanf(filesHandle[i], "%lf", &nums[i]);
    }
    FILE* resHandele = fopen("result.txt", "w");
    int isFirstNum = 1;
    int total = 0;
    while(1)
    {
        int minx = getMin(nums);
        if(minx == - 1)
        {
            break;
        }
        ++total;
        if(isFirstNum)
        {
            fprintf(resHandele,"%lf", nums[minx]);
            isFirstNum = 0;
        }
        else
        {
            fprintf(resHandele," %lf", nums[minx]);
        }

        if(filesHandle[minx] != NULL)
        {
            fscanf(filesHandle[minx], "%lf", &nums[minx]);
            if(feof(filesHandle[minx]))
            {
                fclose(filesHandle[minx]);
                filesHandle[minx] = NULL;
            }
        }

    }
    printf("sorted nums total = %d\n", total);
    fclose(resHandele);
    clearBunches();
}


int main()
{
    puts("Hello world!");
    //testStack(0);
	// test data generate
    int n = 1;
    int maxData = 10000;
    int total = 201100;
    char src[100];
    int i;
    for(i = 0; i < n; ++i)
    {
        memset(src, '\0', sizeof(src));
        sprintf(src, "test_data_%d.txt", i);
        generateData(src, maxData, total);
    }
    // split
    for(i = 0; i < n; ++i)
    {
        memset(src, '\0', sizeof(dst));
        sprintf(src, "test_data_%d.txt", i);
        splitBunch(src);
    }
	// merge
    mergeBunches();
    puts("Hello world!");
    return 0;
}
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Leo Bliss

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值