前阵子正好刚学到C多线程和正则表达式.所以想着写下代码实践下.
主要实现功能是将source.bin文件中的数字部分全部导入到target.bin文件中.
主要涉及到多线程交替处理.[这里涉及到mutex锁定共享数据.线程交替实现难点.]
gather线程负责一行一行采集,并且把每行的数字进行提取
process线程相对简单,只是单纯的写入文件.
MyTest.c
//通过多线程[交替]/正则实现获取源文件中每行的数字部分并存储到目标文件中.
/*
编写以下代码的过程中主要遇到以下一些问题,思考解决办法花了挺多时间.
1.线程需要交替进行,并且存在数据共享情况.
2.之前一直没留意到关于内存分配.calloc及realloc中的size不能为0.当隐式分配时候会经常忽略.[如遇到直接换行符情况]
*/
#define __STDC_WANT_LIB_EXT1__ 1
#include "memctr.h" //引入内存释放接口文件
//#include <stdio.h> //File IO
#include <threads.h> //Thread
#include <regex.h> //Regex
//#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <time.h>
typedef struct _DATA{
FILE* pfile;
char* filename;
//data 用于线程之间交互的共享变量.随后每个线程共享此地址
char** data;
} _DATA;
//function prototype
//gather 读取源文件,并提取每行中的数字部分
int gather(void*);
//process 将线程gather获取到的数据写入目标文件.
int process(void*);
//hasNextLine 返回是否成功读取下行数据.并且将对应结果存储.
bool hasNextLine(FILE*,char**);
//reg_next 使用正则提取当前行当前位置后的匹配部分
char* reg_next(regex_t*,char*,size_t);
mtx_t mutex; //对共享数据线程中的读写加锁
thrd_t gather_id;
thrd_t process_id;
bool _gather_finish = false;//用于判断
bool _gather_newline = false;
//用于gather与process线程交替执行.
bool current_thread = 1;
bool _gather = 0;
bool _process = 1;
struct timespec spec = {.tv_nsec = 1000};
int main(void){
_DATA source = {.pfile = NULL,.filename = "source.bin",.data = calloc(1,sizeof(char*))};
//以下通过将source.data赋值给target.data实现线程间的数据共享
_DATA target = {.pfile = NULL,.filename = "target.bin",.data = source.data};
//将需要最后释放的内存存入.
_incr((void**)&(source.data));
//init
switch(mtx_init(&mutex,mtx_plain)){
case thrd_success:
break;
case thrd_error:
fprintf(stderr,"Failed to initialize gather_lock.\n");
thrd_exit(EXIT_FAILURE);
}
errno_t state = fopen_s(&source.pfile,source.filename,"rb");
if(state){
fprintf(stderr,"Failed to open file:%s.\n>Reason: %s\n",source.filename,strerror(state));
thrd_exit(EXIT_FAILURE);
}
state = fopen_s(&target.pfile,target.filename,"wb");
if(state){
fprintf(stderr,"Failed to open file:%s.\n>Reason: %s\n",target.filename,strerror(state));
thrd_exit(EXIT_FAILURE);
}
//threads
switch(thrd_create(&gather_id,gather,&source)){
case thrd_success:
printf("%s > Thread gather start.\n",__func__);
break;
case thrd_nomem:
fprintf(stderr,"%s > Failed to allocate memory to thread gather.\n",__func__);
thrd_exit(EXIT_FAILURE);
case thrd_error:
fprintf(stderr,"%s > Failed to create thread gather.\n",__func__);
thrd_exit(EXIT_FAILURE);
}
switch(thrd_create(&process_id,process,&target)){
case thrd_success:
printf("%s > Thread process start.\n",__func__);
break;
case thrd_nomem:
fprintf(stderr,"%s > Failed to allocate memory to thread process.\n",__func__);
exit(EXIT_FAILURE);
case thrd_error:
fprintf(stderr,"%s > Failed to create thread process.\n",__func__);
exit(EXIT_FAILURE);
}
//正常情况下process线程在gather线程后完成.
thrd_join(process_id,NULL);
fclose(source.pfile);
fclose(target.pfile);
free_mem(); //最后这是释放内存
return 0;
}
int gather(void* pdata){
_DATA* psource = (_DATA*)pdata;
FILE* pfile = psource->pfile;
char** data = psource->data;//share-data
bool _hasNextLine = false;
char*re_data = NULL;//作用于记录处理后共享变量的临时变量
char* sub_data = NULL;//作用于记录每次获取子字符串
regex_t regex;
char* pattern = "[[:digit:]]+";
int state = regcomp(®ex,pattern,REG_EXTENDED);
if(state){
char* errbuf = calloc(100,sizeof(char));
regerror(state,®ex,errbuf,100);
fprintf(stderr,"%s> Failed to compile regex:%s\nReason:%s\n",__func__,pattern,errbuf);
free(errbuf);
exit(EXIT_FAILURE);
}
//regex compile success!
while(true){
mtx_lock(&mutex);
_hasNextLine = hasNextLine(pfile,data);
if(!_hasNextLine){
_gather_finish = true;
regfree(®ex);
current_thread = _process;
regfree(®ex);//无数据需要采集.释放regex
mtx_unlock(&mutex);
break;
}
//这里当新行是直接换行的时候会出现问题.因为calloc和realloc函数对于设置长度为0为产生异常.所以需要单独拿出来判断
if(strlen(*data)){
re_data = calloc(strlen(*data),sizeof(char));//临时变量长度不会超过*data长度
while(sub_data = reg_next(®ex,*data,strlen(*data))){
strcat(re_data,sub_data);
free(sub_data);
}
re_data = realloc(re_data,strlen(re_data));
free(*data);
*data = re_data;
}
current_thread = _process;
_gather_newline = true;//跳转新行
mtx_unlock(&mutex);
while(current_thread == _process){
thrd_sleep(&spec,NULL);
}
}
return 0;
}
int process(void* pdata){
_DATA* ptarget = (_DATA*)pdata;
while(true){
mtx_lock(&mutex);
if(_gather_finish){
free(*(ptarget->data));
break;
}//写入文件也可以采用fwrite方式
if(*(ptarget->data)){
fprintf(ptarget->pfile,*(ptarget->data));
}
current_thread = _gather;
mtx_unlock(&mutex);
while(current_thread == _gather){
thrd_sleep(&spec,NULL);
}
}
return 0;
}
char* reg_next(regex_t* regex,char* str,size_t len){
static size_t current = 0lu;//记录当前行之前正则匹配结果后的位置
if(_gather_newline){//如果采集新行则重新刷新current的值
current = 0lu;
_gather_newline = false;
}
if(current == len){
return NULL;
}
char* c_str = str + current;
size_t group = regex->re_nsub;
regmatch_t matches[group + 1];//针对本次需要实现的功能,实际上仅用到matches[0].
switch(regexec(regex,c_str,group + 1,matches,0)){
case REG_NOMATCH:
return NULL;
case 0:
break;
}
current += matches[0].rm_eo;
size_t s_len = matches[0].rm_eo - matches[0].rm_so;
c_str += matches[0].rm_so;
char* reval = calloc(s_len+ 1,sizeof(char));
memcpy(reval,c_str,s_len);
return reval;
}
bool hasNextLine(FILE* pfile,char** pstr){
if(*pstr){
free(*pstr);
*pstr = NULL;
}
static bool _eof = false;
if(_eof){
return false;
}
unsigned long capacity = 5;
unsigned long incr = 10;
fpos_t position;
fgetpos(pfile,&position);
retry:
*pstr = calloc(capacity,sizeof(char));
fgets(*pstr,capacity,pfile);
char* temp = NULL;
if(feof(pfile)){
temp = realloc(*pstr,strlen(*pstr) + 1);
if(!temp){
fprintf(stderr,"Failed to re-allocate memory.Roll back.[%d]\n",__LINE__);
fsetpos(pfile,&position);
free(*pstr);
*pstr = NULL;
return false;
}
*pstr = temp;
_eof = true;
return true;
}
if((*pstr)[strlen(*pstr) - 1] == '\n'){
(*pstr)[strlen(*pstr) - 2] = '\0'; // \r\n
if((*pstr)[0] == '\0'){
return true;
}
temp = realloc(*pstr,strlen(*pstr) + 1);
if(!temp){
fprintf(stderr,"Failed to re-allocate memory.Roll back.[%d]\n",__LINE__);
fsetpos(pfile,&position);
free(*pstr);
*pstr = NULL;
return false;
}
*pstr = temp;
return true;
}
capacity += incr;
free(*pstr);
//*pstr = NULL;
fsetpos(pfile,&position);
goto retry;
}
memctr.h
#ifndef _MEMCTR
#include <stdlib.h>
#include <stdio.h>
#define T MEMCTR
struct T* MEMCTR_LIST;
#define _list MEMCTR_LIST
unsigned long _MEM_LIST_SIZE = 0ul;
typedef struct MEMCTR{
void** ptr;
struct T* next;
} MEMCTR;
unsigned long _incr(void** ptr){
if(_list == NULL){
_list = calloc(1,sizeof(T));
_list->ptr = ptr;
_list->next = NULL;
_MEM_LIST_SIZE = 1;
return _MEM_LIST_SIZE;
}
T* last = _list;
for(;last->next;last = last->next);
T* incr = calloc(1,sizeof(T));
incr->next = NULL;
incr->ptr = ptr;
last->next = incr;
return ++ _MEM_LIST_SIZE;
}
void free_mem(void){
T* temp = _list;
while(temp){
_list = temp->next;
free(*(temp->ptr));
*(temp->ptr) = NULL;
free(temp);
temp = _list;
}
_MEM_LIST_SIZE = 0;
}
#undef _list
#undef T
#endif