C语言的正则表达式具有效率高,编程容易的特点,其精华在于写出一个好的正则表达式规则。
示例如下。规则可以看我的其他博文。
下面是示例调试的时候,可以发现,其实有些问题,不能置零字符串尾,否则否内存错误。很奇怪,这是C的内存操作的奥秘啊。
#include <sys/types.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char ** argv)
{
// !AIVDM,\.{20,80}\,.[*] ^(hisencyber)(.com|.com.cn)
// \{m,n\} "!AIVDM,\..*\,0*" "!AIVDM,\.*\,0*" REG_NOSUB child char check
char * pRegexStr = (char *)"!AIVDM,\.{1,72}\[*]";
char * pText_ =
"!AIVDM,2,1,3,A,577K2:02;M1AI9HkP01DpUHE9<4j0lfo;33P000`40=664000422isC@,0*2F\
!AIVDM,2,2,3,A,@h0000000000000,2*0F\
!AIVDM,1,1,,B,9002=mQq1oIJvt6;2eUn>Sh0040<,0*5D\
!AIVDM,1,1,,A,>5?Per18=HB1U:1@E=B0m<L,2*51\
!AIVDM,1,1,,B,C5N3SRP0IFJKmE4:v6pBGw`@62PaLELTBJ:V00000000S0D:R220,0*3A\
!AIVDM,2,1,5,B,E1mg=5J1T4W0h97aRh6ba84<h2d;W:Te=eLvH50```q,0*46\
!AIVDM,2,2,5,B,:D44QDlp0C1DU00,2*36\
!AIVDM,1,1,,B,15Mq4J0P01EREODRv4@74gv00HRq,0*88\
!AIVDM,1,1,,A,;4eG>3iuaFP2:r3OiBH7;8i00000,0*65\
!AIVDM,1,1,,A,B5N3SRP0FFJFC`4:CQDFKwiP200>,0*75\
!AIVDM,1,1,,B,H5NHcTP<51@4TrM>10584@U<D000,2*77 ";
regex_t oRegex;
regmatch_t pmatch;
char *msg;
int nErrCode = 0 ; int len ; int start ;int end ;
char szErrMsg[1024] = {0};
size_t unErrMsgLen = 0;
char * pText = pText_ ;
if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
{
while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
{
len = pmatch.rm_eo-pmatch.rm_so+2 ;
start = pmatch.rm_so ;end =pmatch.rm_eo+2;
msg=(char*)malloc((len)*(sizeof(char)));
memcpy(msg,&pText[start],len);
msg[(end)]=(char)NULL ;
printf("%s \n",msg);
pText = &pText[end+1] ;
}
regfree(&oRegex);
}
unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));
unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;
szErrMsg[unErrMsgLen] = '\0';
printf("ErrMsg: %s\n", szErrMsg);
regfree(&oRegex);
return 1;
}
将之作一个链表的数据结构组包,增强程序的耦合性。
数据结构的示例可以看博文:http://blog.csdn.net/luopeiyuan1990/article/details/8255632
void Regex_MSG(pRet_Regex pret_regex,char *Regex_rule,char *pText_)
{
pRet_Regex pret_regex_ = pret_regex ;
char *pRegexStr = Regex_rule ;
char * pText = pText_ ;
regex_t oRegex;
regmatch_t pmatch;
int nErrCode = 0 ; int len ; int start ;int end ;
char szErrMsg[1024] = {0};
size_t unErrMsgLen = 0;
if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
{
while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
{
pRet_Regex pNode =Before_Null_Node(pret_regex);
//pRet_Regex pNode = CreateNode();
AddNode(pNode,pNode->next);
len = pmatch.rm_eo-pmatch.rm_so+2 ;
start = pmatch.rm_so ;end =pmatch.rm_eo+2;
pNode->next->Data=(char*)malloc((len)*(sizeof(char)));
memcpy(pNode->next->Data,&pText[start],len);
pNode->next->Data[(end)]=(char)NULL ;
//printf("%s \n",pNode->next->Data);
pText = &pText[end+1] ;
}
regfree(&oRegex);
PrintList(pret_regex);
}
unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));
unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;
szErrMsg[unErrMsgLen] = '\0';
printf("ErrMsg: %s\n", szErrMsg);
regfree(&oRegex);
}
最后分拆为两个文件:
#include <sys/types.h>
#include "Eregex.hpp"
int main(int argc, char ** argv)
{
// !AIVDM,\.{20,80}\,.[*] ^(hisencyber)(.com|.com.cn)
// \{m,n\} "!AIVDM,\..*\,0*" "!AIVDM,\.*\,0*" REG_NOSUB child char check
//char * pRegexStr = (char *)"!AIVDM,\.{1,72}\[*]";
char * pText_ =
(char*)"!AIVDM,2,1,3,A,577K2:02;M1AI9HkP01DpUHE9<4j0lfo;33P000`40=664000422isC@,0*2F\
!AIVDM,2,2,3,A,@h0000000000000,2*0F\
!AIVDM,1,1,,B,9002=mQq1oIJvt6;2eUn>Sh0040<,0*5D\
!AIVDM,1,1,,A,>5?Per18=HB1U:1@E=B0m<L,2*51\
!AIVDM,1,1,,B,C5N3SRP0IFJKmE4:v6pBGw`@62PaLELTBJ:V00000000S0D:R220,0*3A\
!AIVDM,2,1,5,B,E1mg=5J1T4W0h97aRh6ba84<h2d;W:Te=eLvH50```q,0*46\
!AIVDM,2,2,5,B,:D44QDlp0C1DU00,2*36\
!AIVDM,1,1,,B,15Mq4J0P01EREODRv4@74gv00HRq,0*88\
!AIVDM,1,1,,A,;4eG>3iuaFP2:r3OiBH7;8i00000,0*65\
!AIVDM,1,1,,A,B5N3SRP0FFJFC`4:CQDFKwiP200>,0*75\
!AIVDM,1,1,,B,H5NHcTP<51@4TrM>10584@U<D000,2*77 ";
pRet_Regex pret_regex;
pret_regex = (pRet_Regex)malloc(sizeof(Ret_Regex)); pret_regex->next =NULL;
//!AIVDM,\.{1,72}\[*]
Regex_MSG(NULL,(char*)"!AIVDM,\.*\[*]",pText_);
}
另外一个是hpp文件,但是使用的时候最好写到c文件中
#include <stdio.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Ret_Regex
{
char *Data;
unsigned char length;
Ret_Regex *next;
}Ret_Regex,*pRet_Regex ;
#define Linkpointer pRet_Regex
#define LinkEntity Ret_Regex
void Entity_entity(Linkpointer Node)
{
//Node->Data = readyplaindata.length ;
//Node->Data = readyplaindata.Data ;
}
/* first is messy print */
void Entity_print(Linkpointer Node)
{
printf("%s \n",Node->Data);
}
void PrintList(Linkpointer phead)
{
Linkpointer p = phead ;
while(p!=NULL)
{
Entity_print(p);
p = p->next ;
}
}
/* Length 1 no data only head */
int GetLength(Linkpointer phead)
{
Linkpointer p = phead ; int Length=0 ;
while(p!=NULL)
{
Length ++ ;
p = p->next ;
}
return Length ;
}
Linkpointer Before_Null_Node(Linkpointer phead)
{
Linkpointer p = phead ;
while(p->next!=NULL)
{
p=p->next ;
}
return p ;
}
/* Create,return Node add */
Linkpointer CreateNode()
{
Linkpointer pNode = (Linkpointer)malloc(sizeof(LinkEntity));
if(pNode==NULL)
{
printf("Node malloc error \n");
}
Entity_entity(pNode);
pNode->next = NULL ;
return pNode ;
}
/* Add Node between */
void AddNode(Linkpointer pNode,Linkpointer pNode2)
{
Linkpointer pNew = CreateNode();
pNode->next = pNew ;
pNew->next = pNode2 ;
}
/* Delete next Node */
bool DeleteNode(Linkpointer pNode)
{
Linkpointer pDel = pNode->next ;
if(pDel==NULL)
{
printf(" No Node to Delete ");
return 0 ;
}
pNode->next = pDel->next ;
pDel->next = NULL ;
free(pDel);
return 1 ;
}
/**************************************************************
* Two Mode
* 1. find str meets Regex_rule , fill it into linklist
* (linklist must initialis first)
* (Head of pText will change)
* 2. find first str meets Regex_rule , return it
* (Head of pText will change)
* pText is not freed , free yourself.
*
***************************************************************/
char* Regex_MSG(pRet_Regex pret_regex_,char *Regex_rule,char *pText_)
{
pRet_Regex pret_regex = pret_regex_ ;
char *pRegexStr = Regex_rule ;
char * pText = pText_ ;
regex_t oRegex;
regmatch_t pmatch;
int nErrCode = 0 ; int len ; int start ;int end ;
char szErrMsg[1024] = {0};
size_t unErrMsgLen = 0;
if(pret_regex!=NULL)
{
if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
{
while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
{
pRet_Regex pNode =Before_Null_Node(pret_regex);
//pRet_Regex pNode = CreateNode();
AddNode(pNode,pNode->next);
len = pmatch.rm_eo-pmatch.rm_so+2 ;
start = pmatch.rm_so ;end =pmatch.rm_eo+2;
pNode->next->Data=(char*)malloc((len)*(sizeof(char)));
memcpy(pNode->next->Data,&pText[start],len);
pNode->next->Data[(end)]=(char)NULL ;
//printf("%s \n",pNode->next->Data);
pText = &pText[end+1] ;
}
regfree(&oRegex);
PrintList(pret_regex);
}
}
else{
char *OutChar ;
if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
{
while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
{
len = pmatch.rm_eo-pmatch.rm_so+2 ;
start = pmatch.rm_so ;end =pmatch.rm_eo+2;
OutChar=(char*)malloc((len)*(sizeof(char)));
memcpy(OutChar,&pText[start],len);
OutChar[(end)]=(char)NULL ;
printf("Out char is %s \n",OutChar);
pText = &pText[end+1] ;
return OutChar ;
}
}
}
unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));
unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;
szErrMsg[unErrMsgLen] = '\0';
printf("ErrMsg: %s\n", szErrMsg);
regfree(&oRegex);
return NULL ;
}
bug修改如下:
char* Regex_MSG(pRet_Regex pret_regex_,char *Regex_rule,char *pText_,pTAisRawData pAisRawData_,int FrontShifting,int BackShifting)
{
pRet_Regex pret_regex = pret_regex_ ;
pTAisRawData p = pAisRawData_ ;
char *pRegexStr = Regex_rule ;
char * pText = pText_ ;
char *OutChar ;
regex_t oRegex;
regmatch_t pmatch;
int nErrCode = 0 ; int len ; int start ;int end ;
char szErrMsg[1024] = {0};
size_t unErrMsgLen = 0;
if(pret_regex!=NULL)
{
if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
{
while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
{
pRet_Regex pNode =Ret_Regex_Before_Null_Node(pret_regex);
//pRet_Regex pNode = CreateNode();
Ret_Regex_AddNode(pNode,pNode->next);
len = pmatch.rm_eo-pmatch.rm_so+BackShifting ;
start = pmatch.rm_so +FrontShifting;
end =pmatch.rm_eo+BackShifting;
pNode->next->Data=(char*)malloc((len)*(sizeof(char)));
memcpy(pNode->next->Data,&pText[start],len);
//pNode->next->Data[(end)]=(char)NULL ;
//printf("%s \n",pNode->next->Data);
pText = &pText[end] ;
}
Ret_Regex_PrintList(pret_regex);
}
}
else{
if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
{
while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
{
len = pmatch.rm_eo-pmatch.rm_so+BackShifting ;
start = pmatch.rm_so +FrontShifting;end =pmatch.rm_eo+FrontShifting;
OutChar=(char*)malloc((len)*(sizeof(char)));
memcpy(OutChar,&pText[start],len);
//OutChar[(end)]=(char)NULL ;
printf("Out char is %s \n",OutChar);
pText = &pText[end] ;
}
if(p!=NULL)
{
free(p->Data);
p->Data=strdup(pText);
}
}
}
unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));
unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;
szErrMsg[unErrMsgLen] = '\0';
printf("ErrMsg: %s\n", szErrMsg);
regfree(&oRegex);
return OutChar ;
}