c++ 文件拆分与合并——结合linux进程管理wait/waitpid

ze言

已于 2022-03-18 12:44:09 修改

阅读量2.6k

点赞数 6

分类专栏：服务器 Linux 文章标签： linux 服务器 c++ vs

于 2022-03-18 09:55:25 首次发布

本文链接：https://blog.csdn.net/hml111666/article/details/123563797

版权

Linux 同时被 2 个专栏收录

34 篇文章 13 订阅

订阅专栏

服务器

29 篇文章 5 订阅

订阅专栏

一、基本思路

1、拆分：读一个文件写多个文件

2、合并：读多个文件写入到一个文件

3、父子进程：必须等待子进程拆分之后才合并（if 、else if结构，如下）

    pid = fork();
	if (pid == 0)
	{
        //业务操作——调用函数等
		exit(0);
	}
	else if (pid > 0)
	{
		int res = wait();//waitpid
	}

如果在父进程执行到wait前，子进程就已经结束了，父进程的wait还能接收到子进程的。即当父进程fork的时候把代码拷贝给子进程，同时也在执行wait的操作。即if 、else if相当于并行。
主进程先做fork才产生孩子，孩子的执行效率会慢一点点（毫秒之差）
简单的说进程就是运行的程序，就像我们在电脑上我连续开了QQ、爱奇艺、网易云音乐，我现在只听音乐，但是QQ、爱奇艺仍然在运行，所以进程是可以同时运行的
exit不是传递数据的，只是反馈结果（类似return ，但仅仅是结果），在做业务逻辑的时候可以有 -1、0、1三个参数（或者两个都可以）分别表示逻辑出错、结束或失败、成功。所以不能理解为把拆分的文件个数传递给父进程（参数写1000，父进程也接收不到）

二、实现及效果

1、常见问题分析

在循环读取文件的时候，不是立刻open，要先获取每一个文件名拼接上路径（如/root/projects）opendir 、readdir 中的d_name是文件名。
拼接上了路径也不能立刻循环读，循环写，因为拆分后文件命名有规则，读的时候不一样按照顺序1.avi,2.avi,3.avi这样读。即文件名中有字母的话按照英文字母表的排序，如果是数字的话就不正常了，可能是按照1,10,11，...19,2,20,21...这样读取（字符串排序：一个个字符比较，比如第一个字符是1的排在一起，如下图所示）。
要想解决上面这个问题，这时候有人可能会想到用拆分时用来命名的number变量，通过这个number++实现顺序读取，但是你不知道拆分后的文件夹中文件的总数。也有的可能会想到子进程拆分文件中的个数通过exit传递参数，但前面提到，显然不行。
那么就要用到排序函数sort，利用友元函数进行重写，转换成整数的方式进行顺序即可（string 转成char再转成int,两个整形就可以进行比较）

但是传递进来的路径并不是单纯的数字，所以就要涉及到字符串的切割substr(从x位置切割刀y位置)，例如把/root/projects/breakFIle/1.avi中的数字1抠出来，这也就是为什么选择string类型的list容器。

2、代码

代码版本一：未把路径存入链表进行排序等

void breakFile(string srcFile, string dstFile)
{
	int readfd = 0, writefd = 0;
	int res1 = 0, res2 = 0;
	//char* buf = new char[50];/	只能读取8字节
	char buf[102400] = { 0 };//存二进制数据
	string dstName = "";
	int tpNum = 1;
	char c[90] = { 0 };

	umask(0);
	//只读的方式打开
	readfd = open(srcFile.c_str(), O_RDONLY, 0777);
	//文件存在
	if (readfd < 0 || writefd < 0)//打开文件失败，返回-1
	{
		perror("open file error");
	}
	else
	{
		//读取的内容不为空就继续读取
		while ((res1 = read(readfd, buf, sizeof(buf))) > 0)
		{
			snprintf(c, 90, "%d", tpNum);

			dstName = dstFile + "/" + string(c) + ".avi";
			cout << "文件名 " << dstName << endl;
			writefd = open(dstName.c_str(), O_CREAT | O_WRONLY, 0777);
			cout << "res1 = " << res1 << endl;
			res2 = write(writefd, buf, res1);
			if (res2 > 0)
			{
				//cout << "res2 = " << res2 << endl;
				//读取完一次就清空，为下次做准备
				close(writefd);
				dstName = "";
				tpNum++;
				bzero(buf, sizeof(buf));
			}

		}
		close(readfd);

	}
	//cout << "breakFile finished " << endl;
}

void split_merge_file(string srcPath, string dstPath)
{
	//使用wait：视频文件拆分合并
	int pid = 0;
	int status = 0;
	int res = 0;
	struct dirent* tpdirect;
	DIR* dir;
	struct stat tpstat;//保存路径信息的结构体


	int readfd = 0, writefd = 0;
	int res1 = 0, res2 = 0;
	static const long size = 1024000;
	char buf[size] = { 0 };


	stat(srcPath.c_str(), &tpstat);
	if (S_ISDIR(tpstat.st_mode) == 1)//是文件夹
	{
		perror("open file error（is dir）");
		return;
	}
	else
	{
		//文件总大小
		long file_size = tpstat.st_size;
		cout << "file size is: " << file_size << " byte" << endl;
		//拆分文件个数
		int n = file_size / size + 1;
		cout << "the number of splited file is: " << n << endl;
		pid = fork();
		if (pid == 0)//子进程
		{
			//cout << "子进程 pid =" << getpid() << "开始拆分" << endl;
			//拆分文件
			breakFile(srcPath, dstPath);
			cout << "子进程 pid =" << getpid() << "拆分成功" << endl;
			//退出子进程
			_exit(1);
		}
		else if (pid > 0)
		{
			res = waitpid(pid, &status, NULL);
			if (WIFEXITED(status))
			{
				//cout << "父进程 status =" << WEXITSTATUS(status) << endl;
				if (WEXITSTATUS(status) == 1)
				{
					//cout << "返回给父进程的" << "status = 1" << endl;
					if ((dir = opendir(dstPath.c_str())) == NULL)
					{
						perror("open dir error");//perror可以查看具体错误
					}
					else
					{

						stat(dstPath.c_str(), &tpstat);

						char splitFileName[200] = { 0 };
						char mergeFileName[200] = { 0 };
						strcat(mergeFileName, dstPath.c_str());
						strcat(mergeFileName, "/merge.avi");
						//cout << "mergeFileName =" << mergeFileName << endl;
						int index = 1;//记录搜素到第几个文件

						umask(0);//要在循环外打开要合并（写入）的文件
						writefd = open(mergeFileName, O_CREAT | O_WRONLY, 0777);//O_APPEND不行

						while ((tpdirect = readdir(dir)) != NULL)//循环遍历这个文件夹
						{

							cout << "tpdirect->d_names =" << tpdirect->d_name << endl;
							bzero(splitFileName, sizeof(splitFileName));
							//sprintf(mergeFile, "%s%s%s", dstPath, "/", tpdirect->d_name);
							strcat(splitFileName, dstPath.c_str());
							strcat(splitFileName, "/");
							strcat(splitFileName, tpdirect->d_name);

							cout << "splitFileName =" << splitFileName << endl;

							stat(splitFileName, &tpstat);
							if (S_ISDIR(tpstat.st_mode) == 1)//拆分完的文件夹中是否还有其他文件夹
							{
								continue;
							}
							else
							{

								cout << "index =" << index << endl;
								if (index == n + 1)
								{
									break;
								}
								index++;

								//cout << "tpdirect->d_names =" << tpdirect->d_name << endl;
								umask(0);
								//只读的方式打开
								readfd = open(splitFileName, O_RDONLY, 0777);

								//文件存在——只写
								//writefd = open(mergeFileName, O_CREAT | O_WRONLY, 0777);
								cout << "mergeFileName =" << mergeFileName << endl;
								if (readfd < 0 || writefd < 0)//打开文件失败，返回-1
								{
									perror("open file error");
								}
								else
								{

								//读取的内容不为空就继续读取
									while ((res1 = read(readfd, buf, sizeof(buf))) > 0)
									{

										cout << "res1 = " << res1 << endl;
										res2 = write(writefd, buf, res1);
										cout << "res2 = " << res2 << endl;

										//读取完一次就清空，为下次做准备
										if (res2 > 0)
										{
											close(readfd);//先关闭读的

											bzero(buf, sizeof(buf));
											break;
										}

									}

									//cout << "continue" << endl;
									continue;

								}

							}

						}
						cout << "合并完成 " << endl;
						close(writefd);
					}
				}
				else
				{
					cout << "子进程拆分文件失败 " << endl;
				}
			}
		}
	}
}


int main(){

	split_merge_file("/root/projects/Warcraft3_End.avi", "/root/projects/breakFIle");
	return 0;

}

代码版本二：main中使用进程，把路径存入链表进行排序，省略拆分

int main()
{
    int pid = 0;
	int status = 0;
	pid = fork();

	if (pid == 0)
	{
		int res = breakFile("/root/projects/Warcraft3_End.avi","/root/projects/breakFIle");
		if (res > 0)
		{
			exit(1);
		}
		else
		{
			exit(0);
		}
	}
	else if (pid > 0)
	{
		cout<< "父进程开始...等待子进程拆分文件结束" << endl;
		int wait_res = wait(&status);
		if (WIFEXITED(status))
		{
			cout << "status=" << WEXITSTATUS(status) << endl;
			if (WEXITSTATUS(status) == 1)
			{
				cout << "子进程拆分文件成功，可以开始合并文件" << endl;
				mergeFile("/root/projects/breakFIle");
			}
			else if(WEXITSTATUS(status) == 0)
			{
				cout << "子进程拆分文件失败" << endl;
			}
		}
	}


	return 0;
}
//文件合并
void mergeFile(char filePath[])
{
	//list容器——存路径
	list<string> pathList;
	//迭代器
	list<string>::iterator it;
	//opendir返回值
	DIR* dir;
	//readdir返回值
	struct dirent* dir_stru;

	//拼接文件完整路径
	char base[50];
	int readfd = 0, writefd = 0;
	//io操作的缓冲区
	char buf[102400] = { 0 };
	//读返回值
	int res = 0;
	char url[100] = { 0 };
	strcat(url, filePath);
	strcat(url, "/mergeAll.avi");
	cout << "url "<<url << endl;
	if ((dir = opendir(filePath)) == NULL)
	{
		perror("open dir error");
		exit(0);
	}
	while ((dir_stru = readdir(dir)) !=NULL)//读取所以文件及文件夹
	{
		if (strcmp(dir_stru->d_name,".") == 0 || strcmp(dir_stru->d_name, "..") == 0)
		{
			continue;//跳过当前文件夹和上一级文件夹，.和..只是预留路径，用来操作命令使用
		}
		//DT_REG = 8是文件  10表示硬链接 4表示文件夹 比结构体指针方便
		else if (dir_stru->d_type == DT_REG)
		{
			cout << "dir_stru->d_name = " << dir_stru->d_name << endl;
			//拼接路径
			sprintf(base,"%s/%s",filePath ,dir_stru->d_name);
			pathList.push_back(base);//string类型可以直接接收char数组
			cout <<"完整路径 = " << base << endl;
		}
		
	}
//路径列表的排序
		pathList.sort([](const string& a, const string& b) {
			return atoi(a.substr(25, a.find(".")).c_str()) < atoi(b.substr(25, b.find(".")).c_str());

			});
		umask(0);
		writefd = open(url,O_CREAT | O_WRONLY,0777);
		cout << "url " << url << endl;
		for (it  = pathList.begin();  it != pathList.end(); it++)
		{
			cout << "排序后路径" << *it << endl;
			readfd = open((*it).c_str(),O_RDONLY, 0777);
			//读多个写一个
			while ((res = read(readfd,buf,sizeof(buf)))!=0)
			{
				write(writefd,buf,res);
			}
			close(readfd);
		}
		cout << "合并成功" << endl;
		close(writefd);	
		closedir(dir);

}

优化：如果传进来的路径带有.符号（非法路径）要判断，在if ((dir = opendir(filePath)) == NULL)之前。

3、效果

遇到的问题：在拆分后循环读取文件夹的时候，只读取到文件7.avi，其他都没有读到，导致合并后的文件大小为7.avi的大小

解决：包括路径列表的排序在内的后面的操作要放在while 循环外面【while ((dir_stru = readdir(dir)) !=NULL)外面】，即先循环把路径添加到链表中再进行读写

总结：

1、合并文件步骤

根据路径打开文件（opendir），判断是文件夹则退出，是文件则过滤掉.和..再循环读取（readdir）文件（d_type==8），然后再拼接成完整路径存入链表。
用find函数查找路径中特定字符的下标（.字符），substr函数切割路径（切割出/和.之间的数字字符），sort函数对整形数字排序，达成对路径列表的排序。
遍历链表中的路径，循环读取内容写入目标文件，全部写完关闭目标文件

原创不易，转载请注明出处：

c++ 文件拆分与合并——结合linux进程管理wait/waitpid

ze言

关注

6
点赞
踩
9

收藏

觉得还不错? 一键收藏
打赏
4
评论
c++ 文件拆分与合并——结合linux进程管理wait/waitpid

一、主线思路1、拆分：读一个文件写多个文件2、合并：读多个文件写入到一个文件3、父子进程：必须等待子进程拆分之后才合并（if 、else if结构，如下）
复制链接

扫一扫