文章目录
文件分隔,合并
将一个大文件(这里测试文件为5.2G)切分为指定大小的文件,然后在把分割后的文件拼接合并为分割前的源文件
#include <boost/timer.hpp> // 计时函数
#include <filesystem>
#include <fstream>
#include <vector>
// 分隔后文件夹的格式, 原文件名_chunk
#define FILE_SUFFIX "_chunk"
// 生成每个小块文件的名称格式, 源文件名_chunk_1.bin
#define CHUNK_NAME "%s_chunk_%d.bin"
/// <summary>
/// 分隔文件函数,将一个文件按照规定的大小分割为多个小块
/// </summary>
/// <param name="inputFile">被分割的文件</param>
/// <param name="chunkSize">分割后每块文件的大小(单位字节): 如果要分割为500mb每块,该数值为 500 * 1024 * 1024 </param>
/// <param name="inputFiles">切分后后的所有文件名</param>
/// <returns></returns>
bool splitFile(const std::string& inputFile, size_t chunkSize, std::vector<std::string>& inputFiles)
{
std::string inputFileName = std::filesystem::path(inputFile).filename().stem().string();
std::string dirname = inputFileName + FILE_SUFFIX;
std::filesystem::path chunkDir = std::filesystem::path(inputFile).parent_path();
chunkDir.append(dirname);
if (!std::filesystem::exists(chunkDir))
{
std::filesystem::create_directories(chunkDir);
}
std::ifstream inputFileStream(inputFile, std::ios::binary | std::ios::ate);
if (!inputFileStream.is_open())
{
std::cout << "Error opening file: " << inputFile << std::endl;
return false;
}
size_t fileSize = static_cast<size_t>(inputFileStream.tellg());
inputFileStream.seekg(0, std::ios::beg);
//如果文件大小小于
size_t numberOfChunks = (fileSize + chunkSize - 1) / chunkSize;
for (size_t i = 0; i < numberOfChunks; ++i)
{
size_t chunkOffset = i * chunkSize;
size_t chunkSizeActual = std::min(chunkSize, fileSize - chunkOffset);
//std::vector<char> buffer(chunkSizeActual);
char buff[256];
sprintf(buff, "%s_chunk_%d.bin", inputFileName.c_str(), static_cast<int>(i));
std::filesystem::path tempChunkDir = chunkDir;
std::string chunkFileName = tempChunkDir.append(buff).string();
inputFiles.push_back(chunkFileName);
char* buffer = new char[chunkSizeActual];
// 移动文件读取指针到当前块的起始位置
inputFileStream.seekg(chunkOffset, std::ios::beg);
inputFileStream.read(buffer, chunkSizeActual);
std::ofstream chunkFile(chunkFileName, std::ios::binary);
if (!chunkFile.is_open())
{
std::cout << "Error creating chunk file: " << chunkFileName << std::endl;
delete[] buffer;
return false;
}
chunkFile.write(buffer, chunkSizeActual);
chunkFile.close();
delete[] buffer;
std::cout << "save split: " << chunkFileName << std::endl;
}
inputFileStream.close();
return true;
}
/// <summary>
/// 将多个文件合并为一个文件
/// </summary>
/// <param name="outputFileName">合并后的文件</param>
/// <param name="inputFiles">需要合并的小文件</param>
/// <returns></returns>
bool mergeFiles(const std::string& outputFileName, const std::vector<std::string>& inputFiles)
{
std::ofstream outputFile(outputFileName, std::ios::binary);
if (!outputFile.is_open())
{
std::cout << "Error creating output file: " << outputFileName << std::endl;
return false;
}
for (const auto& inputFile : inputFiles)
{
std::ifstream inputFileStream(inputFile, std::ios::binary);
if (!inputFileStream.is_open()) {
std::cout << "Error opening input file: " << inputFile << std::endl;
return false;
}
outputFile << inputFileStream.rdbuf();
inputFileStream.close();
std::cout << "merge: " << inputFile << std::endl;
}
outputFile.close();
return true;
}
int main() {
std::string inputFile = "./split_file.rar"; // 这里测试文件大小为5.2G
std::string mergedFileName = "./merged_file.rar";
size_t chunkSize = 500 * 1024 * 1024; // 500 MB
boost::timer tm1; // 定义后计时开始
tm1.restart(); // 从新从这里开始计时
std::vector<std::string> inputFiles;
bool sp = splitFile(inputFile, chunkSize, inputFiles);
if (sp)
{
std::cout << "splitFile successed" << std::endl;
}
else
{
std::cout << "splitFile failed" << std::endl;
}
std::cout << tm1.elapsed() << std::endl; // 单位是秒
tm1.restart(); // 从新从这里开始计时
bool me = mergeFiles(mergedFileName, inputFiles);
if (me)
{
std::cout << "mergeFiles successed" << std::endl;
}
else
{
std::cout << "mergeFiles failed" << std::endl;
}
std::cout << tm1.elapsed() << std::endl; // 单位是秒
return 0;
}