外排序

<span style="font-family: Arial, Helvetica, sans-serif;">当你有20M的内存,想对200M的数据进行排序的时候,就需要用到外排序。
参考自:http://www.cnblogs.com/benjamin-t/p/3325401.html
因为看不懂败者树,所以我用堆排序来做
1.不要使用 ifstream::eof()来判断输入流结尾。这是个坑
2.建堆操作O(n * logn),调整O(logn).能调整的时候务必调整。效率影响无比之大
</span>
<span style="font-family: Arial, Helvetica, sans-serif;">#include <iostream></span>
#include <algorithm>
#include <fstream>
#include <ctime>
#include <functional>
#include <vector>
using namespace std;
#define MEMORY 20000  //内存容量
#define MAXNUM 100000  //随机数总数
int buffer[MEMORY];
int a[MAXNUM];
void adjust_heap(int a[],int hole,int size)
{
	while(hole < size)
	{
		int lchild = hole * 2 + 1;
		int rchild = hole * 2 + 2;
		int min = hole;
		if(lchild < size && a[lchild] < a[hole])
			min = lchild;
		if(rchild < size && a[rchild] < a[min])
			min = rchild;
		if(min == hole)
			break;
		else
		{
			int t = a[min];
			a[min] = a[hole];
			a[hole] =t;
			hole = min;
		}
	}
}
void generate_num()
{
	srand(unsigned(time(NULL)));
	for(auto i = 0;i < MAXNUM;++i)
	{
		a[i] = i + 1;
	}
	for(auto i = 0;i < MAXNUM;++i)
	{
		auto j = rand()%MAXNUM;
		auto t = a[i];
		a[i]   = a[j];
		a[j]   = t;
	}
	ofstream out("data");
	for(auto i = 0;i <MAXNUM;++i)
	{
		out << a[i] << endl;
	}
	out.close();
	out.open("sort_data");
	sort(a,a+MAXNUM);
	for(auto i = 0;i <MAXNUM;++i)
	{
		out << a[i] << endl;
	}
	out.close();
}
int generate_runs(const char* data_file)
{
	ifstream in(data_file);
	ofstream out;
	int i = 0;
	int file_count = 0;
	char filename[20];


	while(i < MEMORY && in >> buffer[i++])
		;
	// buffer 满 (假设第一遍肯定读满)
	if(i == MEMORY)
	{
		int size = MEMORY;
		while(1)
		{
			sprintf(filename,"%d",file_count++);
			out.open(filename);
			make_heap(buffer,buffer+size,greater<int>());
			while(size > 0)
			{
				int t;
				if(in >> t)
				{
					out << buffer[0] << endl;
					//cout << buffer[0] << endl;   //
					if(t < buffer[0])
					{
						// 属于下一个顺串
						buffer[0] = buffer[--size];
						buffer[size] = t;
						//make_heap(buffer,buffer + size,greater<int>());
						adjust_heap(buffer,0,size);
					}
					else
					{
						// 属于当前顺串
						buffer[0] = t;
						//make_heap(buffer,buffer + size,greater<int>());
						adjust_heap(buffer,0,size);
					}
				}
				else
				{
					break;
				}
			}
			// 说明该顺串输出完毕
			if(size == 0)
			{
				out.close();
				size = MEMORY;
			}
			// 文件读完,处理剩余的数据
			else
			{
				int offset = size;
				int lsize  = MEMORY - size;
				// 1.继续输出前面部分
				while(size > 0)
				{
					//make_heap(buffer,buffer + size,greater<int>());
					adjust_heap(buffer,0,size);
					out << buffer[0] << endl;
					//cout << buffer[0] << endl; //
					buffer[0] = buffer[size - 1];
					--size;
				}
				out.close();


				// 判断是否还需要创建新文件
				if(lsize > 0)
				{
					sprintf(filename,"%d",file_count++);
					out.open(filename);


					// 在这里建一次堆。下面只需要调整
					make_heap(buffer + offset,buffer + offset + lsize,greater<int>());
				}


				// 2.输出后面部分到一个顺串
				while(lsize > 0)
				{
					adjust_heap(buffer + offset,0,lsize);
					out << buffer[offset] << endl;
					//cout << buffer[offset] << endl;  //
					buffer[offset] = buffer[offset + lsize - 1];
					--lsize;
				}
				out.close();
				break;
			}
		}
	}
	return file_count;
}
void merger_sort(int runs,char* out_file)
{
	//ifstream in[100];
	ifstream* in = new ifstream[runs];


	ofstream out(out_file);
	int loser = -1;
	// 每个run都能提供输出
	int *backup = new int[runs];
	vector<int> buffer;
	char filename[20];
	// 打开文件
	for(int i = 0;i < runs;++i)
	{
		sprintf(filename,"%d",i);
		in[i].open(filename);
	}
	// 开工
	// 预先往buffer中加入runs个数据
	for(int i = 0;i < runs;++i)
	{
		in[i] >> backup[i];
		buffer.push_back(backup[i]);
	}
	while(buffer.size() > 0)
	{
		make_heap(buffer.begin(),buffer.end(),greater<int>());
		out << buffer[0] << endl;
		// 找出buffer[0] 所属的输入流
		for(int i = 0;i < runs;++i)
		{
			if(buffer[0] == backup[i])
			{
				loser = i;
				buffer[0] = buffer[buffer.size()-1];
				buffer.pop_back();
				break;
			}
		}
		// if(in[loser].eof())
		// {
		// 	loser = -1;
		// }
		// if(loser != -1)
		// {
		// 	int t;
		// 	in[loser] >> t;
		// 	backup[loser] = t;
		// 	buffer.push_back(t);
		// }
		int t;
		if(! (in[loser] >> t))
		{
			loser = -1;
		}
		else
		{
			backup[loser] = t;
			buffer.push_back(t);
		}
	}


	//清理
	for(int i = 0;i < runs;++i)
	{
		in[i].close();
	}
	out.close();
	delete []in;
	delete []backup;
}


bool check()
{
	ifstream in1("merge");
	ifstream in2("sort_data");
	int a,b;
	while(in1 >> a && in2 >> b)
	{
		if(a != b)
			return false;
	}
	return true;
}
int main()
{
	auto begin = time(NULL);
	cout << "开始产生随机数" << endl;
	generate_num();
	cout << "随机数产生完毕,花费时间 " << time(NULL) - begin << endl;
	cout << "归并排序开始" << endl;
	begin = time(NULL);
	int runs = generate_runs("data");
	merger_sort(runs,"merge");
	cout << "归并排序完毕,花费时间 " << time(NULL) - begin << endl;
	cout << "检查结果为" << (check() ? "正确" : "错误" )<< endl;
}


                
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值