#include <iostream>
#include <vector>
#include <string>
#include <fstream>
struct Node
{
int num;
int fileNum;
};
void swap(Node &i,Node &j)
{
Node tmp = i;
i = j;
j = tmp;
}
void fttb_heap(std::vector<Node> &nums,int i,int n)
{
while(1)
{
int maxPos = i;
if(i*2 + 1 <= n && nums[maxPos].num > nums[i*2 + 1].num)
{
maxPos = i*2 + 1;
}
if(i*2 + 2 <= n && nums[maxPos].num > nums[i*2 + 2].num)
{
maxPos = i*2 + 2;
}
if(maxPos == i)
{
break;
}
swap(nums[i],nums[maxPos]);
i = maxPos;
}
}
void bttf_heap(std::vector<Node> &nums,int i)
{
while(1)
{
int maxPos = i;
if((i - 1)/2 >= 0 && nums[maxPos].num < nums[(i - 1)/2].num)
{
maxPos = (i - 1)/2 ;
}
if(maxPos == i)
{
break;
}
swap(nums[i],nums[maxPos]);
i = maxPos;
}
}
void build_heap(std::vector<Node> &nums,int n)
{
for(int i = (n - 1)/2; i >= 0;i--)
{
fttb_heap(nums,i,n);
}
}
void insert_heap(std::vector<Node> &nums,Node& stNode)
{
nums.emplace_back(stNode);
bttf_heap(nums,nums.size() - 1);
}
Node del_heap(std::vector<Node> &nums)
{
Node stNode = nums[0];
swap(nums[0],nums[nums.size() - 1]);
nums.pop_back();
fttb_heap(nums,0,nums.size() - 1);
return stNode;
}
int main(int argc,char** argv)
{
printf("hello world\n");
std::vector<std::string> vectorFileName;
std::vector<std::ifstream> vectorFileStream;
std::ofstream outFileStream("after_merge_file.txt");
if(argc < 2)
{
printf("usage: %s merge files name list\n",argv[0]);
return -1;
}
for(int i = 1;i < argc;i++)
{
printf("get file name %s\n",argv[i]);
vectorFileName.emplace_back(argv[i]);
std::ifstream file(argv[i]);
vectorFileStream.emplace_back(std::move(file));
}
printf("size %d %d\n",vectorFileStream.size(),vectorFileName.size());
std::vector<Node> nums;
for(int i = 0;i < vectorFileStream.size();i++)
{
if(vectorFileStream[i].good())
{
char buf[128] = {0};
vectorFileStream[i].getline(buf,128);
int num = atoi(buf);
printf("get num %d\n",num);
Node stNode;
stNode.num = num;
stNode.fileNum = i;
nums.emplace_back(stNode);
}
}
printf("befor build heap\n");
for(int i = 0; i < nums.size(); i++)
{
printf("%d\n",nums[i].num);
}
//将数据构建小顶堆
build_heap(nums,nums.size()-1);
printf("after build heap\n");
for(int i = 0; i < nums.size(); i++)
{
printf("[%d]%d\n",nums[i].fileNum,nums[i].num);
}
while(nums.size() != 0)
{
Node stNode = del_heap(nums);
printf("get %d %d\n",stNode.fileNum,stNode.num);
outFileStream<<stNode.num<<"\n";
//由于同个文件里的数据是排好序的,所以该文件的下一个数据可能也是最小的,
//因此需要从栈顶文件的该数据的文件中拿出最新数据插入,再构建小顶堆
if(vectorFileStream[stNode.fileNum].good())
{
char buf[128] = {0};
vectorFileStream[stNode.fileNum].getline(buf,128);
int num = atoi(buf);
printf("[%d] get num %d\n",stNode.fileNum,num);
Node tmp{num,stNode.fileNum};
insert_heap(nums,tmp);
}
}
return 0;
}
编译
g++ meger_file.cpp -sdt=c++11 -o meger_file
这里合并三个文件
1.txt
1
4
6
7
231
412
1231
1244
2.txt
4
5
7
8
9
12
32
42
446
654
3.txt
0
3
41
55
656
858
968
6575
合并文件
./meger_file 1.txt 2.txt 3.txt
after_merge_file.txt
0
1
3
4
4
5
6
7
7
8
9
12
32
41
42
55
231
412
446
654
656
858
968
1231
1244
6575