外排序模拟(含缓冲区和归并路数)

External Sorting

利用输者树生成顺串模拟磁盘的文件、实现归并。其中输入、输出缓冲区、归并路数可自设。
输者树参考:败方树(输者树)的建立

#include<iostream>
#include<fstream>
#include<vector>
#include<string>
#include<queue>
#include<stdlib.h>
#include<time.h>
using namespace std;

int DISK_NUM = 0; //硬盘访问总次数
const int INF = 0x3f3f3f;
struct Sequece {
	int val;
	int num;
	bool operator<= (const Sequece &a) {
		if (num != a.num)
			return num <= a.num;
		return val <= a.val;
	}
	bool operator> (const Sequece &a) {
		if (num != a.num)
			return num > a.num;
		return val > a.val;
	}
	Sequece() {
		val = INF;
		num = INF;
	}
};
struct File {
	string filename; //文件名
	queue<int> data; //一个文件中的所有数据
	int fileLength; //文件的长度(大小)
	int bufferIndex; //每个文件的缓冲区位置的索引
	int *buffer; //属于每个文件自己的缓冲区


	void initData() {
		ifstream temp(filename);
		int t;
		while (temp >> t)
			data.push(t);
		fileLength = data.size();
		temp.close();
	}
	void initBuffer(int sizePerBuffer) {
		buffer = new int[sizePerBuffer];
		bufferIndex = 0;
		for (int i = 0; i < sizePerBuffer; i++) {
			if (!data.empty()) {
				int temp = data.front();
				data.pop();
				buffer[i] = temp;
			}
			else
				buffer[i] = INF;
		}
	}

	friend bool operator< (const File &a, const File &b) {
		return a.fileLength < b.fileLength;
	}
};


template<class T>
class loserTree
{
private:
	int *tree; //输者树
	T *player; //外部节点
	int *temp; //每次得到的赢家
	int numOfPlayer; //外部节点数量

	int lowExt; //最底层外部节点数,内部节点数的2倍
	int offset; //若是满二叉树时,所有内部节点的数量

	int winner(int x, int y) { return player[x] <= player[y] ? x : y; }
	int loser(int x, int y) { return player[x] > player[y] ? x : y; }
	void play(int gamepoint, int leftPlayer, int rightPlayer);

public:
	loserTree(int n) {
		tree = nullptr, temp = nullptr;
		if (n < 2) { return; }
		tree = new int[n + 1];
		temp = new int[n + 1];
		numOfPlayer = n;
	}
	void output() {
		cout << "test:" << endl;
		for (int i = 0; i < numOfPlayer; i++)
		{
			cout << player[tree[i]].val << ":" << player[tree[i]].num << "  ";
		}
		cout << endl;
		for (int i = 0; i < numOfPlayer; i++)
			cout << tree[i] << " ";
		cout << endl;
		for (int i = 1; i <= numOfPlayer; i++)
			cout << player[i].val << " ";
		cout << endl;
	}

	void initialize(T *thePlayers, int n);
	void replay(int thePlayer, T value);
	int theWinner() { return tree[0]; }

};
template<class T>
void loserTree<T>::initialize(T *thePlayers, int n)
{
	numOfPlayer = n;
	player = thePlayers;

	int i, s; //s表示树最底层第一个节点的数组编号 s = 2^log (n-1)
	for (s = 1; 2 * s <= n - 1; s *= 2) ;
	lowExt = 2 * (n - s); //最底层连接的外部节点数
	offset = 2 * s - 1; //满二叉树时的节点数

	//最底层外部节点的比赛
	for (i = 2; i <= lowExt; i += 2)
		play((i + offset) / 2, i - 1, i);

	//处理其余外部节点
	if (n % 2 == 1) {
		//当n奇数时,内部节点和外部节点的比赛
		play(n / 2, temp[n - 1], lowExt + 1);
		i = lowExt + 3;
	}
	else 
		i = lowExt + 2;

	//i为最左剩余节点
	for (; i <= n; i += 2)
		play((i - lowExt + n - 1) / 2, i - 1, i);

	//记录输者树的最终赢者
	tree[0] = temp[1];
}
template<class T>
void loserTree<T>::play(int p, int left, int right)
{
	tree[p] = loser(left, right); //记录输者
	temp[p] = winner(left, right); //记录赢者

	//若在右孩子处可能有多场比赛
	while (p % 2 == 1 && p > 1)
	{
		tree[p / 2] = loser(temp[p - 1], temp[p]);
		temp[p / 2] = winner(temp[p - 1], temp[p]);
		p /= 2; //向上走
	}
}
template<class T>
void loserTree<T>::replay(int thePlayer, T newValue)
{
	player[thePlayer] = newValue; //更新值
	
	int matchPoint, left, right; //比赛的节点,该节点的左、右孩子
	//找到第一个比赛节点及其子女
	if (thePlayer <= lowExt) //从最底层开始
		matchPoint = (thePlayer + offset) / 2;
	else
		matchPoint = (thePlayer - lowExt + numOfPlayer - 1) / 2;

	//重新比赛
	for (; matchPoint >= 1; matchPoint /= 2)
	{
		int loserTemp = loser(tree[matchPoint], thePlayer);
		temp[matchPoint] = winner(tree[matchPoint], thePlayer);
		tree[matchPoint] = loserTemp;
		thePlayer = temp[matchPoint];
	}

	tree[0] = temp[1];
}


void randomGenerateFile() {
	ofstream FILE("testSpace.txt", ios::out);
	srand((unsigned)time(nullptr));
	//1000000
	for (int i = 0; i < 20; i++)
		FILE << rand() << " ";
	FILE.close();
}


int main(void) {
	randomGenerateFile();

	//50000 小数据20->35
	int Space = 35; //内存总大小
	int kx = 1, inputBuffer = 20000, outputBuffer = 15000;
	int k = 60; //设置归并路数

	cout << "输入缓冲区大小: ";
	cin >> inputBuffer;
	cout << "输出缓冲区大小: ";
	cin >> outputBuffer;
	cout << "归并路数: ";
	cin >> k;

	//输入输出的缓冲区大小
	int freeSpace = Space - (kx * inputBuffer + outputBuffer); //剩余的缓冲区总大小(用于排序)
	int inputBufferAll = kx * inputBuffer;
	//开辟各部分空间
	int *input = new int[inputBufferAll];
	int *output = new int[outputBuffer];
	Sequece *free = new Sequece[freeSpace + 1]; //1表示下标从1开始





	//从文件中读入数据,生成顺串文件
	ifstream allPlayerFile("testSpace.txt"); //绑定所有选手的文件
	int sequeceNum = 1;

	//先初始化
//=======================//
	DISK_NUM++;
//=======================//
	for (int i = 0; i<inputBufferAll; i++) {
		//向输入缓冲区填充数据
		bool flag = true;
		if (allPlayerFile >> input[i])
			flag = false;
		if(flag) input[i] = INF;
	}
	int inputIndex = 0;
	for (int i = 1; i <= freeSpace; i++) {
		free[i].val = input[inputIndex++]; //初始添加至外部节点
		if (free[i].val == INF) free[i].num = INF;
		else free[i].num = 1;
	}
	loserTree<Sequece> generateSequece(freeSpace);
	generateSequece.initialize(free, freeSpace);

	//处理输出缓冲区
	int outputIndex = 0;
	while (true) {
		int wi = generateSequece.theWinner(); //取出一个赢家
		//输出缓冲区满,则刷新输出缓冲区
		if (outputIndex >= outputBuffer) {
		//=======================//
			DISK_NUM++;
		//=======================//
			string ofn = "inputSequece_";
			string bfn = ".txt";
			ofstream outputFile;
			outputFile.open(ofn + to_string(sequeceNum) + bfn, ios::out | ios::app);
			for (int i = 0; i < outputIndex; i++)
				outputFile << output[i] << " ";
			outputIndex = 0;
			if (free[wi].num != sequeceNum) sequeceNum++; //刷新缓冲区时序号不同,总串数+1
			outputFile.close();
		}

		//刷新输入缓冲区
		if (free[wi].val == INF) {
			//清空输出缓冲区
			if (outputIndex != 0) {
			//=======================//
				DISK_NUM++;
			//=======================//
				string ofn = "inputSequece_";
				string bfn = ".txt";
				ofstream outputFile;
				outputFile.open(ofn + to_string(sequeceNum) + bfn, ios::out | ios::app);
				for (int i = 0; i < outputIndex; i++)
					outputFile << output[i] << " ";
				outputIndex = 0;
				sequeceNum++;
				outputFile.close();
			}

			input[0] = INF;
			if (allPlayerFile >> input[0]) ;
			if (input[0] == INF) {
				allPlayerFile.close();
				break; //文件输入结束
			}
			
			//向输入缓冲区填充数据
		//=======================//
			DISK_NUM++;
		//=======================//
			for (int i = 1; i < inputBufferAll; i++) {
				
				bool flag = true;
				if (allPlayerFile >> input[i])
					flag = false;
				if (flag) input[i] = INF;
			}
			inputIndex = 0;
			for (int i = 1; i <= freeSpace; i++) {
				free[i].val = input[inputIndex++]; //初始添加至外部节点
				if (free[i].val == INF) free[i].num = INF;
				else free[i].num = sequeceNum;
			}
			generateSequece.initialize(free, freeSpace);
			continue; //千万注意不能少!!!
		}

		//判断是否输出顺串文件
		if (free[wi].num != sequeceNum) {
		//=======================//
			DISK_NUM++;
		//=======================//
			string ofn = "inputSequece_";
			string bfn = ".txt";
			ofstream outputFile;
			outputFile.open(ofn + to_string(sequeceNum) + bfn, ios::out | ios::app);
			for (int i = 0; i < outputIndex; i++)
				outputFile << output[i] << " ";
			outputIndex = 0;
			sequeceNum++;
			outputFile.close();
		}
		output[outputIndex++] = free[wi].val;

		//向缓冲区取下一个替换的玩家
		Sequece temp_insert;
		if (inputIndex >= inputBufferAll) {
			temp_insert.val = INF;
			temp_insert.num = INF;
		}
		else {
			temp_insert.val = input[inputIndex];
			if (input[inputIndex] >= free[wi].val) {
				if(input[inputIndex]==INF) temp_insert.num = INF;
				else temp_insert.num = free[wi].num;
			}
			else
				temp_insert.num = free[wi].num + 1;
			inputIndex++;
		}
		generateSequece.replay(wi, temp_insert);
	}
	




	//开始k路归并
	
	int inputBufferPerSize = inputBufferAll / 5;
	int outputBufferSize = Space - k - inputBufferPerSize * 5;
	int *merge_outputBuffer = new int[outputBufferSize];

	priority_queue<File> allFilePQ; //记录当前可作为归并排序输入的文件名
	string ifn = "inputSequece_";
	string bfn = ".txt";
	//初始化当前顺串们的所有文件到优先队列中
	for (int i = 1; i <= sequeceNum - 1; i++) {
		string fn = ifn + to_string(i) + bfn;
		File temp_file;
		temp_file.filename = fn;
		temp_file.initData();
	//=======================//
		DISK_NUM++;
	//=======================//
		temp_file.initBuffer(inputBufferPerSize);
		allFilePQ.push(temp_file);
	}


	int outputSeqNum = 1; //当前输出顺串编号
	while (allFilePQ.size() != 1) {
		File *processFiles = new File[k + 1]; //正在处理的文件
		int fileNumK = 0; //最大值为k(最大为k路归并)
		for (int i = 1; i <= k; i++) {
			if (allFilePQ.size() >= 1) {
				processFiles[i] = allFilePQ.top();
				allFilePQ.pop();
				fileNumK = i;
			}
			else break; //优先队列中的文件都已经遍历了
		}

		loserTree<int> merge(fileNumK); //k路归并的输者树
		int *mergeFree = new int[fileNumK + 1]; //归并时的外部节点,下标从1开始
		//初始化外部节点(全为新文件,即不需要刷新缓冲区)
		for (int i = 1; i <= fileNumK; i++) {
			mergeFree[i] = processFiles[i].buffer[0];
			processFiles[i].bufferIndex++; //索引前进
		}
		merge.initialize(mergeFree, fileNumK);


		//正式开始生成输出的顺串
		int outputBufferIndex = 0; //输出缓冲区的索引
		string ofn = "outputSequece_" + to_string(outputSeqNum) + bfn; //预备输出的文件名
		ofstream outputfff(ofn);
		while (true) {
			int wi = merge.theWinner();

			//输出缓冲区满,刷新
			if (outputBufferIndex >= outputBufferSize) {
			//=======================//
				DISK_NUM++;
			//=======================//
				for (int i = 0; i < outputBufferSize; i++)
					outputfff << merge_outputBuffer[i] << " ";
				outputBufferIndex = 0;
			}

			//归并完成
			if (mergeFree[wi] == INF) {
				if (outputBufferIndex != 0) {
				//=======================//
					DISK_NUM++;
				//=======================//
					for (int i = 0; i < outputBufferIndex; i++)
						outputfff << merge_outputBuffer[i] << " ";
					outputBufferIndex = 0;
				}
				break;
			}

			//赢家写入输出缓冲区
			merge_outputBuffer[outputBufferIndex++] = mergeFree[wi];

			//从缓冲区取下一个元素
			int i_t = processFiles[wi].bufferIndex;
			if (i_t >= inputBufferPerSize) {
				//单个缓冲区刷新
			//=======================//
				DISK_NUM++;
			//=======================//
				processFiles[wi].initBuffer(inputBufferPerSize);
				if (processFiles[wi].buffer[0] != INF) {
					mergeFree[wi] = processFiles[wi].buffer[0];
					processFiles[wi].bufferIndex++;
				}
				else
					mergeFree[wi] = INF;
			}
			else {
				mergeFree[wi] = processFiles[wi].buffer[i_t];
				processFiles[wi].bufferIndex++;
			}
			merge.replay(wi, mergeFree[wi]);
		}
		outputfff.close();
		

		//一次归并完成,将新文件压入优先队列
		outputSeqNum++;
		ifstream inputfff(ofn);
		File temp_file;
		temp_file.filename = ofn;
		temp_file.initData();
		temp_file.initBuffer(inputBufferPerSize);
		allFilePQ.push(temp_file);
	}

	string last = allFilePQ.top().filename;
	cout << "[EXTERNAL SORT SUCCESSFULLY] in file : " << last << endl;
	cout << "[DISK VISIT] totally: " << DISK_NUM << endl;



	system("pause");
	return 0;
}
  • 0
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值