多路归并外排序(C++实现,VS2022)

在VS2022中,实现多路归并外排序。

实现程序大致分为以下几个部分:

  1. 声明结构体及相关操作;结构体包含键和数据两部分,可以按键值从小到大排序;
  2. 创建三个列表,写入数据后存储到三个不同的文件中;
  3. 将文件中的数据关键字读取到内存中,完成多路归并排序(直接写入文件)。

实现如下:在VS2022中新建空项目,添加main.cpp文件。输入如下代码:

#include <afx.h>
#include <iostream>
#include <fstream>
#include <vector>
#include <sstream>
#include <direct.h>	//	c++ mkdir所在文件
#include <algorithm>

#pragma pack (push)
#pragma pack (1)
//	数据结构定义
typedef struct MyInfo 
{
	unsigned int key;
	int data;
}MyInfo;
//	数据在文件中的特征
typedef struct MyInfoFeature{
	unsigned int key;	//	关键字
	_int64 pos; //	在文件中位置
	int fileNo;	//	在打开文件列表中的编号
}MyInfoFeature;
#pragma pack (pop)

#define DATA_DIR "d:\\MyInfoData\\"
#define SORTED_DIR "d:\\MySortedData\\"

typedef std::vector<MyInfo> MyInfoVector;			//	信息数组(列表)类.
typedef std::vector<MyInfoVector> MyInfoVectors;	//	信息数组(列表)集合类.

typedef std::vector<MyInfoFeature> MyInfoFeatureVector;			//	信息特征数组(列表)类.
typedef std::vector<MyInfoFeatureVector> MyInfoFeatureVectors;	//	信息特征数组(列表)集合类.

void showMyInfo(MyInfo myInfo)
{
	std::cout << "key: " << myInfo.key << ". data: " << myInfo.data << std::endl;
}
void showMyInfoVector(MyInfoVector& myInfoVector)
{
	for (MyInfoVector::iterator it = myInfoVector.begin(); it != myInfoVector.end(); it++)
		showMyInfo(*it);
}

void showMyInfoFeature(MyInfoFeature f)
{
	std::cout << "key: " << f.key << ". pos: " << f.pos << ". fileNo: " << f.fileNo << std::endl;
}
void showMyInfoFeatureVector(MyInfoFeatureVector& fv)
{
	for (MyInfoFeatureVector::iterator it = fv.begin(); it != fv.end(); it++)
		showMyInfoFeature(*it);
}

//	比较两个特征大小的函数
bool lessThan(MyInfoFeature& f1, MyInfoFeature& f2)
{
	return f1.key < f2.key;
}

//	声明并设置三个信息数组(列表).
MyInfoVector myInfoVector1, myInfoVector2, myInfoVector3;
void setMyInfoVectors()
{
	MyInfo myInfo = {0};
	{//	设置myInfoVector1
		MyInfoVector& myInfoVector = myInfoVector1;
		myInfo.key = 7;		myInfo.data = myInfo.key * 10;		myInfoVector.push_back(myInfo);
		myInfo.key = 4;		myInfo.data = myInfo.key * 10;		myInfoVector.push_back(myInfo);
		myInfo.key = 1;		myInfo.data = myInfo.key * 10;		myInfoVector.push_back(myInfo);
		//	显示设置的内容.	
		std::cout << "myInfoVector1: " << std::endl;
		showMyInfoVector(myInfoVector1);
	}

	{//	设置myInfoVector2
		MyInfoVector& myInfoVector = myInfoVector2;
		myInfo.key = 11;	myInfo.data = myInfo.key * 10;	myInfoVector.push_back(myInfo);
		myInfo.key = 8;		myInfo.data = myInfo.key * 10;	myInfoVector.push_back(myInfo);
		myInfo.key = 5;		myInfo.data = myInfo.key * 10;	myInfoVector.push_back(myInfo);
		myInfo.key = 2;		myInfo.data = myInfo.key * 10;	myInfoVector.push_back(myInfo);
		//	显示设置的内容.	
		std::cout << "myInfoVector2: " << std::endl;
		showMyInfoVector(myInfoVector2);
	}

	{//	设置myInfoVector3
		MyInfoVector& myInfoVector = myInfoVector3;
		myInfo.key = 12;	myInfo.data = myInfo.key * 10;	myInfoVector.push_back(myInfo);
		myInfo.key = 9;		myInfo.data = myInfo.key * 10;	myInfoVector.push_back(myInfo);
		myInfo.key = 6;		myInfo.data = myInfo.key * 10;	myInfoVector.push_back(myInfo);
		myInfo.key = 3;		myInfo.data = myInfo.key * 10;	myInfoVector.push_back(myInfo);
		//	显示设置的内容.	
		std::cout << "myInfoVector3: " << std::endl;
		showMyInfoVector(myInfoVector3);
	}

}
//	将三个信息数组写入不同的文件中.	2022.4.18
int writeMyInfoVectors()
{
	MyInfoVectors myInfoVectors;
	myInfoVectors.push_back(myInfoVector1);
	myInfoVectors.push_back(myInfoVector2);
	myInfoVectors.push_back(myInfoVector3);
	int i = 1;
	std::stringstream ss;
	std::ofstream ofs;	//	输出文件
	std::string fn;	//	文件名
	for (MyInfoVectors::iterator it = myInfoVectors.begin();
		it != myInfoVectors.end(); 
		it++)
	{
		
		ss.str("");	//	清空ss的字符串.
		ss << DATA_DIR << "MyInfoData" << i++ << ".dat";
		fn = ss.str();
		
		ofs.open(fn, std::ios::binary | std::ios::out | std::ios::trunc);	//	创建文件
		if (ofs.bad()) return -1;
		for (MyInfoVector::iterator itV = it->begin(); itV != it->end(); itV++) {
			MyInfo& myInfo = *itV;
			ofs.write((char*)&myInfo, sizeof(myInfo));
		}
		ofs.close();	//	关闭文件
	}
	return 0;
}

std::vector<std::string> openedFileNames;	//	打开文件名列表
std::vector<std::ifstream*> ifsVector;		//	打开输入文件列表
MyInfoFeatureVectors myInfoFeatureVectors;
//	多路归并排序. 
//	将创建的文件中的数据读入myInfoFeatureVectors中. 
//	对各路排序后,进行归并排序.
void mergeSort()
{
	CFileFind finder;
	CString strDir = DATA_DIR;
	for (bool bWorking = finder.FindFile(strDir + "*.dat"); bWorking;) {
		bWorking = finder.FindNextFile();
		std::string fileName = finder.GetFilePath().GetBuffer();
		std::ifstream* ifs = new std::ifstream;
		std::cout  << "正在打开文件" << fileName << std::endl;
		ifs->open(fileName, std::ios::binary | std::ios::in);
		if (ifs->bad()) {
			delete ifs;
			std::cout << fileName << "打开失败" << std::endl;
			continue;
		}else
			std::cout << fileName << "打开成功!" << std::endl;
		openedFileNames.push_back(fileName);
		ifsVector.push_back(ifs);
	}

	MyInfo myInfo;
	MyInfoFeature myInfoFeature;
	MyInfoFeatureVectors myInfoFeatureVectors;
	for (int i = 0; i < ifsVector.size(); i++) {
		MyInfoFeatureVector myInfoFeatureVector;
		std::ifstream& ifs = *ifsVector[i];
		// 获取文件大小
		std::streampos fSize = 0;
		ifs.seekg(0, std::ios::end);
		fSize = ifs.tellg();
		std::cout << "已打开文件" << openedFileNames[i] << "的大小为: " << fSize << "字节." << std::endl;
		// 读取并提取特征数据
		for (ifs.seekg(0, std::ios::beg); ifs.tellg() < fSize; ) {
			myInfoFeature.pos = ifs.tellg(); //	
			myInfoFeature.fileNo = i;
			ifs.read((char*)&myInfo, sizeof(myInfo));	//	读取myInfo
			//	提取特征.
			myInfoFeature.key = myInfo.key;
			//	加入数组中.
			myInfoFeatureVector.push_back(myInfoFeature);
		}
		myInfoFeatureVectors.push_back(myInfoFeatureVector);
		
	}

	//	显示myInfoFeatureVectors的内容
	std::cout << "各路排序前myInfoFeatureVectors的内容" << std::endl;
	for (MyInfoFeatureVectors::iterator it = myInfoFeatureVectors.begin();
		it != myInfoFeatureVectors.end(); it++) {
		showMyInfoFeatureVector(*it);
	}

	//	各路排序
	for (MyInfoFeatureVectors::iterator it = myInfoFeatureVectors.begin();
		it != myInfoFeatureVectors.end(); it++) {
		sort((*it).begin(), (*it).end(), ::lessThan);
	}

	//	显示myInfoFeatureVectors的内容
	std::cout << "各路排序后myInfoFeatureVectors的内容" << std::endl;
	for (MyInfoFeatureVectors::iterator it = myInfoFeatureVectors.begin();
		it != myInfoFeatureVectors.end(); it++) {
		showMyInfoFeatureVector(*it);
	}

	//	多路归并排序
	std::ofstream ofs;	//	输出文件
	std::string fn = SORTED_DIR;	fn += "merge.dat";//	文件名
	ofs.open(fn, std::ios::binary | std::ios::out | std::ios::trunc);	//	创建文件
	if (ofs.bad()) return;
	//	计算元素的总数
	int nEleTotal = 0;
	for (MyInfoFeatureVectors::iterator it = myInfoFeatureVectors.begin();
		it != myInfoFeatureVectors.end(); it++) {
		nEleTotal += it->size();
	}
	//	每个MyInfoFeatureVector的下标
	std::vector<int> indexes;	indexes.resize(myInfoFeatureVectors.size());
	//	排序(核心代码)
	for (int n = 0; n < nEleTotal; n++) {
		unsigned int minKey = -1; //	-1对应最大的无符号整数
		int iMin = 0; //	最小的key所在的数组的编号
		for (int i = 0; i < myInfoFeatureVectors.size(); i++) {
			for (int j = indexes[i]; j < myInfoFeatureVectors[i].size(); j++) {
				if (myInfoFeatureVectors[i][j].key < minKey) {
					minKey = myInfoFeatureVectors[i][j].key;
					iMin = i;
				}
			}
		}
		//	从myInfoFeatureVectors[iMin][indexes[iMin]]对应的文件中读取数据.
		int jTemp = indexes[iMin];
		ifsVector[iMin]->seekg(myInfoFeatureVectors[iMin][jTemp].pos, std::ios::beg);
		ifsVector[iMin]->read((char*)&myInfo, sizeof(myInfo));
		//	将读取的数据写入文件
		ofs.write((char*)&myInfo, sizeof(myInfo));
		//	改变对应的索引值.
		indexes[iMin]++;
	}

	//	关闭文件并删除文件对象
	for (std::vector<std::ifstream*>::iterator it = ifsVector.begin(); it != ifsVector.end(); it++) {
		(*it)->close();
		delete (*it);
	}
	ofs.close();

	//	读取并显示排序后的文件. 检验排序结果.
	fn = SORTED_DIR;	fn += "merge.dat";//	文件名
	std::ifstream ifs;
	ifs.open(fn, std::ios::binary | std::ios::in);
	// 获取文件大小
	std::streampos fSize = 0;
	ifs.seekg(0, std::ios::end);
	fSize = ifs.tellg();
	std::cout << "已打开文件" << fn << "的大小为: " << fSize << "字节." << std::endl;
	// 读取并提取特征数据
	for (ifs.seekg(0, std::ios::beg); ifs.tellg() < fSize; ) {
		ifs.read((char*)&myInfo, sizeof(myInfo));	//	读取myInfo
		showMyInfo(myInfo);
	}
	ifs.close();
}

int main()
{
	//	设置三个信息数组(列表).
	setMyInfoVectors();
	//	创建目录
	_mkdir(DATA_DIR);	
	_mkdir(SORTED_DIR);
	//	将三个信息数组写入不同的文件中.	2022.4.18
	writeMyInfoVectors();
	//	将创建的文件中的数据读入myInfoFeatureVectors中.
	mergeSort();
	return 0;
}

为了使用MFC的类CFileFind,需要包含头文件afx.h,在编译时会出现错误:

fatal error C1189: #error:  Building MFC application with /MD[d] (CRT dll version) requires MFC shared dll version. Please #define _AFXDLL or do not use /MD[d]

 

解决该错误的方法是,在项目的属性页中,将“配置属性-高级-MFC的使用”右侧的选项由“使用标准Windows库”改为“在静态库中使用MFC”或“在共享DLL中使用MFC”,如下图所示。

并将“字符集”改为“使用多字节字符集”。

程序运行结果如下: 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Alexabc3000

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值