这是一个具有5个缓冲区的败者树
将败者存入节点中,胜者继续往上,最后的胜者存在0
当需要调整时,只需要与父节点比较
/**
* 败者树调整
* @param s 调整位置
*/
void adjust(int s) {
//(s+K)/2为s的双亲
for (size_t t = (s + K) / 2; t > 0; t /= 2) {
if (-1 == s)break;
if (loser_tree[t] == -1 || external[s] > external[loser_tree[t]]) {
swap(s, loser_tree[t]);
}
}
loser_tree[0] = s;
}
/**
* 创建败者树
*/
void create_loser_tree() {
memset(loser_tree, -1, sizeof(int) * K);
for (int i = K - 1; i >= 0; --i) {
adjust(i);
}
}
K路归并
#include <iostream>
#include <cstring>
#include <algorithm>
#include <string>
#include <fstream>
#include <climits>
#include <utility>
#include <windows.h>
#include <io.h>
#include <direct.h>
using namespace std;
const size_t MEMORY_SIZE = 1e6;
template<typename T>
class ExternalSort {
private:
string input_file; //输入文件
string output_file; //输出文件
string merge_directory; //临时存放文件夹
size_t MEMORY_SIZE; //K路归并
size_t K; //分割文件
int* loser_tree; //败者树
T* external; //k个缓冲区
T T_MAX; //最大值
/**
* 创建目录,如果存在则更换名字继续尝试
* @param folderPath 目录名字
* @return 创建是否成功
*/
bool mkdir(string& folderPath) {
while (_access(folderPath.c_str(), 0) != -1) {
folderPath += "_(1)";
}
return 0 == _mkdir(folderPath.c_str());
}
/**
* 从文件中读取数据到数组中
* @param temp 数据
* @param fin 文件
* @return 读到数量
*/
size_t read_data(T temp[], ifstream& fin) {
size_t i = 0;
while (i<MEMORY_SIZE && fin >> temp[i]) {
++i;
}
return i;
}
/**
* 将数据写入文件夹中
* @param temp 数据
* @param path 路径
* @param size 数据个数
*/
void write_data(T temp[], const string& path, const size_t& size) {
ofstream fout(path);
if (!fout) {
exit(-1);
}
for (size_t i = 0; i < size; ++i) {
fout << temp[i] << endl;
}
fout.close();
}
/**
* 内部排序
*/
void memory_sort() {
ifstream fin(input_file);
if (!fin) {
exit(-1);
}
T* temp = new T[MEMORY_SIZE];
while (true) {
//读取数据
size_t size = read_data(temp, fin);
if (0 == size) {
break;
}
//排序
sort(temp, temp + size);
//写入文件
write_data(temp, merge_directory + "\\" + to_string(K) + ".txt", size);
++K;
}
fin.close();
delete[] temp;
temp = nullptr;
}
/**
* 败者树调整
* @param s 调整位置
*/
void adjust(int s) {
//(s+K)/2为s的双亲
for (size_t t = (s + K) / 2; t > 0; t /= 2) {
if (-1 == s)break;
if (loser_tree[t] == -1 || external[s] > external[loser_tree[t]]) {
swap(s, loser_tree[t]);
}
}
loser_tree[0] = s;
}
/**
* 创建败者树
*/
void create_loser_tree() {
memset(loser_tree, -1, sizeof(int) * K);
for (int i = K - 1; i >= 0; --i) {
adjust(i);
}
}
/**
* K路归并
*/
void k_merge() {
auto* buf = new ifstream[K];
//从K个缓冲区依次读取一个数据
for (size_t i = 0; i < K; ++i) {
buf[i].open(merge_directory + "\\" + to_string(i) + ".txt");
if (!buf[i]) {
exit(-1);
}
buf[i] >> external[i];
}
create_loser_tree();
ofstream fout(output_file);
if (!fout) {
exit(-1);
}
while (external[loser_tree[0]] != T_MAX) {
int id = loser_tree[0];
fout << external[id] << endl;
if (buf[id] >> external[id]) {
}
else { //读到文件末尾了,用最大值来顶替
external[id] = T_MAX;
}
adjust(id);
}
fout.close();
for (size_t i = 0; i < K; ++i) {
buf[i].close();
}
delete[] buf;
buf = nullptr;
}
/**
* 删除目录
* @param dirPath 目录
*/
void removeDir(const string& dirPath) {
struct _finddata_t fb; //find the storage structure of the same properties file.
string path;
long handle;
int resultone;
int noFile; // the tag for the system's hidden files
noFile = 0;
handle = 0;
path = dirPath + "/*";
handle = _findfirst(path.c_str(), &fb);
//find the first matching file
if (handle != -1) {
//find next matching file
while (0 == _findnext(handle, &fb)) {
// "." and ".." are not processed
noFile = strcmp(fb.name, "..");
if (0 != noFile) {
path = dirPath + "/" + fb.name;
//fb.attrib == 16 means folder
if (fb.attrib == 16) {
removeDir(path);
//if (fb.size == 0) {
// _rmdir(path.c_str());
//}
}
else {
//not folder, delete it. if empty folder, using _rmdir istead.
remove(path.c_str());
}
}
}
// close the folder and delete it only if it is closed. For standard c, using closedir instead(findclose -> closedir).
// when Handle is created, it should be closed at last.
_findclose(handle);
_rmdir(dirPath.c_str());
}
}
public:
ExternalSort(string input_file, string output_file, const size_t& MEMORY_SIZE, const T& MAX) :
input_file(std::move(input_file)),
output_file(std::move(output_file)),
MEMORY_SIZE(MEMORY_SIZE),
merge_directory("temp"),
K(0),
T_MAX(MAX),
external(nullptr),
loser_tree(nullptr) {
//创建文件夹
mkdir(merge_directory);
}
~ExternalSort() {
if (loser_tree != nullptr) {
delete[] loser_tree;
loser_tree = nullptr;
}
if (external != nullptr) {
delete[] external;
external = nullptr;
}
removeDir(merge_directory);
}
/**
* 外部排序
*/
void external_sort() {
memory_sort();
loser_tree = new int[K];
external = new T[K];
k_merge();
}
};
int main() {
LARGE_INTEGER m_nFreq;
LARGE_INTEGER m_nBeginTime;
LARGE_INTEGER nEndTime;
QueryPerformanceFrequency(&m_nFreq); // 获取时钟周期
QueryPerformanceCounter(&m_nBeginTime); // 获取时钟计数
ExternalSort<int> s("E:\\C_plus_plus_Demo\\input.txt", "E:\\C_plus_plus_Demo\\output.txt", MEMORY_SIZE, INT_MAX);
s.external_sort();
QueryPerformanceCounter(&nEndTime);
std::cout << (double)(nEndTime.QuadPart - m_nBeginTime.QuadPart) * 1000 / m_nFreq.QuadPart << "ms" << std::endl;
return 0;
}