C语言 多线程交替|正则|IO综合运用

本文描述了一个C程序,通过多线程和正则表达式从`source.bin`文件中提取数字,存入`target.bin`。作者探讨了如何处理数据共享、线程交替执行以及内存管理,如mutex锁定和内存分配的注意事项。
摘要由CSDN通过智能技术生成

前阵子正好刚学到C多线程和正则表达式.所以想着写下代码实践下.

主要实现功能是将source.bin文件中的数字部分全部导入到target.bin文件中.

主要涉及到多线程交替处理.[这里涉及到mutex锁定共享数据.线程交替实现难点.]

gather线程负责一行一行采集,并且把每行的数字进行提取

process线程相对简单,只是单纯的写入文件.

MyTest.c

//通过多线程[交替]/正则实现获取源文件中每行的数字部分并存储到目标文件中.
/*
编写以下代码的过程中主要遇到以下一些问题,思考解决办法花了挺多时间.
1.线程需要交替进行,并且存在数据共享情况.
2.之前一直没留意到关于内存分配.calloc及realloc中的size不能为0.当隐式分配时候会经常忽略.[如遇到直接换行符情况]
*/
#define __STDC_WANT_LIB_EXT1__ 1
#include "memctr.h" //引入内存释放接口文件
//#include <stdio.h> //File IO
#include <threads.h> //Thread
#include <regex.h> //Regex
//#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <time.h>


typedef struct _DATA{
	FILE* pfile;
	char* filename;
	//data 用于线程之间交互的共享变量.随后每个线程共享此地址
	char** data; 
} _DATA;


//function prototype
//gather 读取源文件,并提取每行中的数字部分
int gather(void*); 
//process 将线程gather获取到的数据写入目标文件.
int process(void*);
//hasNextLine 返回是否成功读取下行数据.并且将对应结果存储.
bool hasNextLine(FILE*,char**);  
//reg_next 使用正则提取当前行当前位置后的匹配部分
char* reg_next(regex_t*,char*,size_t);


mtx_t  mutex; //对共享数据线程中的读写加锁

thrd_t gather_id;
thrd_t process_id;

bool _gather_finish = false;//用于判断
bool _gather_newline = false;

//用于gather与process线程交替执行.
bool current_thread = 1;  
bool _gather = 0;
bool _process = 1;

struct timespec spec = {.tv_nsec = 1000};

int main(void){


	_DATA source = {.pfile = NULL,.filename = "source.bin",.data = calloc(1,sizeof(char*))};
	//以下通过将source.data赋值给target.data实现线程间的数据共享
	_DATA target = {.pfile = NULL,.filename = "target.bin",.data = source.data};

	//将需要最后释放的内存存入.
	_incr((void**)&(source.data));
	
	//init 
	switch(mtx_init(&mutex,mtx_plain)){
		case thrd_success:
			break;
		case thrd_error:
			fprintf(stderr,"Failed to initialize gather_lock.\n");
			thrd_exit(EXIT_FAILURE);
	}


	errno_t state = fopen_s(&source.pfile,source.filename,"rb");
	if(state){
		fprintf(stderr,"Failed to open file:%s.\n>Reason: %s\n",source.filename,strerror(state));
		thrd_exit(EXIT_FAILURE);
	}

	state = fopen_s(&target.pfile,target.filename,"wb");
	if(state){
		fprintf(stderr,"Failed to open file:%s.\n>Reason: %s\n",target.filename,strerror(state));
		thrd_exit(EXIT_FAILURE);
	}

	//threads
	

	switch(thrd_create(&gather_id,gather,&source)){
		case thrd_success:
			printf("%s > Thread gather start.\n",__func__);
			break;
		case thrd_nomem:
			fprintf(stderr,"%s > Failed to allocate memory to thread gather.\n",__func__);
			thrd_exit(EXIT_FAILURE);
		case thrd_error:
			fprintf(stderr,"%s > Failed to create thread gather.\n",__func__);
			thrd_exit(EXIT_FAILURE);
	}

	switch(thrd_create(&process_id,process,&target)){
		case thrd_success:
			printf("%s > Thread process start.\n",__func__);
			break;
		case thrd_nomem:
			fprintf(stderr,"%s > Failed to allocate memory to thread process.\n",__func__);
			exit(EXIT_FAILURE);
		case thrd_error:
			fprintf(stderr,"%s > Failed to create thread process.\n",__func__);
			exit(EXIT_FAILURE);
	}

	//正常情况下process线程在gather线程后完成.
	thrd_join(process_id,NULL);

	fclose(source.pfile);
	fclose(target.pfile);

	free_mem(); //最后这是释放内存

	return 0;
}

int gather(void* pdata){
	_DATA* psource = (_DATA*)pdata;
	FILE* pfile = psource->pfile;
	char** data = psource->data;//share-data
	bool _hasNextLine = false;
	char*re_data = NULL;//作用于记录处理后共享变量的临时变量
	char* sub_data = NULL;//作用于记录每次获取子字符串
	regex_t regex;
	char* pattern = "[[:digit:]]+";
	int state = regcomp(&regex,pattern,REG_EXTENDED);
	if(state){
		char* errbuf = calloc(100,sizeof(char));
		regerror(state,&regex,errbuf,100);
		fprintf(stderr,"%s> Failed to compile regex:%s\nReason:%s\n",__func__,pattern,errbuf);
		free(errbuf);
		exit(EXIT_FAILURE);
	}
	//regex compile success!
	while(true){	
		mtx_lock(&mutex);
		_hasNextLine = hasNextLine(pfile,data);
		if(!_hasNextLine){
			_gather_finish = true;
			regfree(&regex);
			current_thread = _process;
			regfree(&regex);//无数据需要采集.释放regex
			mtx_unlock(&mutex);
			break;
		}

		//这里当新行是直接换行的时候会出现问题.因为calloc和realloc函数对于设置长度为0为产生异常.所以需要单独拿出来判断
		if(strlen(*data)){
			re_data = calloc(strlen(*data),sizeof(char));//临时变量长度不会超过*data长度
			while(sub_data = reg_next(&regex,*data,strlen(*data))){
				strcat(re_data,sub_data);
				free(sub_data);
			}
			re_data = realloc(re_data,strlen(re_data));
			free(*data);
			*data = re_data;
		}
		current_thread = _process;
		_gather_newline = true;//跳转新行
		mtx_unlock(&mutex);
		while(current_thread == _process){
			thrd_sleep(&spec,NULL);
		}
	}
	return 0;
}

int process(void* pdata){
	_DATA* ptarget = (_DATA*)pdata;
	while(true){
		mtx_lock(&mutex);
		if(_gather_finish){
			free(*(ptarget->data));
			break;
		}//写入文件也可以采用fwrite方式
		if(*(ptarget->data)){
			fprintf(ptarget->pfile,*(ptarget->data));
		}
		current_thread = _gather;
		mtx_unlock(&mutex);
		while(current_thread == _gather){
			thrd_sleep(&spec,NULL);
		}
		
	}
	return 0;
}

char* reg_next(regex_t* regex,char* str,size_t len){
	static size_t current = 0lu;//记录当前行之前正则匹配结果后的位置
	if(_gather_newline){//如果采集新行则重新刷新current的值
		current = 0lu;
		_gather_newline = false;
	}
	if(current == len){
		return NULL;
	}
	char* c_str = str + current;
	size_t group = regex->re_nsub;
	regmatch_t matches[group + 1];//针对本次需要实现的功能,实际上仅用到matches[0].
	switch(regexec(regex,c_str,group + 1,matches,0)){
		case REG_NOMATCH:
			return NULL;
		case 0:
			break;
	}
	current += matches[0].rm_eo;
	size_t s_len = matches[0].rm_eo - matches[0].rm_so;
	c_str += matches[0].rm_so;
	char* reval = calloc(s_len+ 1,sizeof(char));
	memcpy(reval,c_str,s_len);
	return reval;
}



bool hasNextLine(FILE* pfile,char** pstr){

	if(*pstr){
		free(*pstr);
		*pstr = NULL;
	}

	static bool _eof = false;
	if(_eof){
		return false;
	}

	unsigned long capacity = 5;
	unsigned long incr = 10;
	fpos_t position;
	fgetpos(pfile,&position);

	retry:
	*pstr = calloc(capacity,sizeof(char));
	fgets(*pstr,capacity,pfile);
	char* temp = NULL;
	if(feof(pfile)){
		temp = realloc(*pstr,strlen(*pstr) + 1);
		if(!temp){
			fprintf(stderr,"Failed to re-allocate memory.Roll back.[%d]\n",__LINE__);
			fsetpos(pfile,&position);
			free(*pstr);
			*pstr = NULL;
			return false;
		}
		*pstr = temp;
		_eof = true;
		return true;
	}

	if((*pstr)[strlen(*pstr) - 1] == '\n'){
		(*pstr)[strlen(*pstr) - 2] = '\0'; // \r\n
		if((*pstr)[0] == '\0'){
			return true;
		}
		temp = realloc(*pstr,strlen(*pstr) + 1);
		if(!temp){
			fprintf(stderr,"Failed to re-allocate memory.Roll back.[%d]\n",__LINE__);
			fsetpos(pfile,&position);
			free(*pstr);
			*pstr = NULL;
			return false;
		}
		*pstr = temp;
		return true;
	}

	capacity += incr;
	free(*pstr);
	//*pstr = NULL;
	fsetpos(pfile,&position);
	goto retry;


}











memctr.h

#ifndef _MEMCTR

#include <stdlib.h>
#include <stdio.h>
#define T MEMCTR
struct T* MEMCTR_LIST;
#define _list MEMCTR_LIST

unsigned long _MEM_LIST_SIZE = 0ul;

typedef struct MEMCTR{
	void** ptr;
	struct T* next;
} MEMCTR;


unsigned long _incr(void** ptr){
	
	if(_list == NULL){
		_list = calloc(1,sizeof(T));
		_list->ptr = ptr;
		_list->next = NULL;
		_MEM_LIST_SIZE = 1;
		return _MEM_LIST_SIZE;
	}

	T* last = _list;
	for(;last->next;last = last->next);

	T* incr = calloc(1,sizeof(T));
	incr->next = NULL;
	incr->ptr = ptr;
	last->next = incr;

	return ++ _MEM_LIST_SIZE;

}

void free_mem(void){
	T* temp = _list;
	while(temp){
		_list = temp->next;
		free(*(temp->ptr));
		*(temp->ptr) = NULL;
		free(temp);
		temp = _list;
	}
	_MEM_LIST_SIZE = 0;
}

#undef _list
#undef T
#endif









评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值