bitmap位图方案解决海量数据文件排序的问题

参考:http://blog.csdn.net/v_JULY_v/article/details/6451990

产生文件10^7个数据文件data.txt:

http://blog.csdn.net/ldanduo/article/details/8050484

//copyright@ yansha July、2010.05.30。 
//位图方案解决10^7个数据量的文件的排序问题  
//如果有重复的数据,那么只能显示其中一个 其他的将被忽略  
//date:2012.10.8

#include<iostream>
#include<bitset>
#include<assert.h>
#include<time.h>
#include<fstream>
using namespace std;

const int max_each_scan = 5000000;

int main(int argc, char *argv[])
{
	clock_t begin = clock();
	bitset<max_each_scan> bit_map;
	bit_map.reset();

	// open the file with the unsorted data
	FILE *fp_unsort_file = fopen("data.txt", "r");
        assert(fp_unsort_file);
	int num;

	// the first time scan to sort the data between 0 - 4999999
	while (fscanf(fp_unsort_file, "%d ", &num) != EOF)
	{
		if (num < max_each_scan)
		bit_map.set(num, 1);
	}
	
	FILE *fp_sort_file = fopen("sort.txt", "w");
	assert(fp_sort_file);
	int i;

	//write the sorted data into file
	for (i = 0; i < max_each_scan; i++)
	{
		if (bit_map[i] == 1)
		fprintf(fp_sort_file, "%d ", i);
	}

	// the second time scan the data between 5000000 - 9999999
	int result = fseek(fp_unsort_file, 0, SEEK_SET); 	
	if (result)  
        cout << "fseek failed!" << endl;  
        else  
    	{  
        	bit_map.reset();  
		while (fscanf(fp_unsort_file, "%d ", &num) != EOF)  
		{  
		    if (num >= max_each_scan && num < 10000000)  
		    {  
			num -= max_each_scan;  
			bit_map.set(num, 1);  
		    }  
		}  
		for (i = 0; i < max_each_scan; i++)  
		{  
		    if (bit_map[i] == 1)  
			fprintf(fp_sort_file, "%d ", i + max_each_scan);  
		}  
    	}  
      
    	clock_t end = clock();  
    	cout<<"用位图的方法,耗时:"<<endl; 
	cout << (end - begin) / CLK_TCK << "s" << endl;  
   	fclose(fp_sort_file);  
    	fclose(fp_unsort_file);  
    return 0;  
}	



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值