C语言POSIX正则表达式示例


C语言的正则表达式具有效率高,编程容易的特点,其精华在于写出一个好的正则表达式规则。

示例如下。规则可以看我的其他博文。

下面是示例调试的时候,可以发现,其实有些问题,不能置零字符串尾,否则否内存错误。很奇怪,这是C的内存操作的奥秘啊。

#include <sys/types.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(int argc, char ** argv)
{
	// !AIVDM,\.{20,80}\,.[*]    ^(hisencyber)(.com|.com.cn)
	// \{m,n\}   "!AIVDM,\..*\,0*"  "!AIVDM,\.*\,0*"   REG_NOSUB  child char check
    char * pRegexStr = (char *)"!AIVDM,\.{1,72}\[*]";
    char * pText_ =
	"!AIVDM,2,1,3,A,577K2:02;M1AI9HkP01DpUHE9<4j0lfo;33P000`40=664000422isC@,0*2F\
	!AIVDM,2,2,3,A,@h0000000000000,2*0F\
	!AIVDM,1,1,,B,9002=mQq1oIJvt6;2eUn>Sh0040<,0*5D\
	!AIVDM,1,1,,A,>5?Per18=HB1U:1@E=B0m<L,2*51\
	!AIVDM,1,1,,B,C5N3SRP0IFJKmE4:v6pBGw`@62PaLELTBJ:V00000000S0D:R220,0*3A\
	!AIVDM,2,1,5,B,E1mg=5J1T4W0h97aRh6ba84<h2d;W:Te=eLvH50```q,0*46\
	!AIVDM,2,2,5,B,:D44QDlp0C1DU00,2*36\
	!AIVDM,1,1,,B,15Mq4J0P01EREODRv4@74gv00HRq,0*88\
	!AIVDM,1,1,,A,;4eG>3iuaFP2:r3OiBH7;8i00000,0*65\
	!AIVDM,1,1,,A,B5N3SRP0FFJFC`4:CQDFKwiP200>,0*75\
	!AIVDM,1,1,,B,H5NHcTP<51@4TrM>10584@U<D000,2*77 ";

    regex_t oRegex;

    regmatch_t pmatch;
    char *msg;

    int nErrCode = 0 ; int len ; int start ;int end ;
    char szErrMsg[1024] = {0};
    size_t unErrMsgLen = 0;

    char * pText = pText_ ;
    if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
    {
        while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
        {
        	len = pmatch.rm_eo-pmatch.rm_so+2 ;
        	start = pmatch.rm_so ;end =pmatch.rm_eo+2;
			msg=(char*)malloc((len)*(sizeof(char)));
			memcpy(msg,&pText[start],len);
			msg[(end)]=(char)NULL ;
			printf("%s \n",msg);
			pText = &pText[end+1] ;
        }


        regfree(&oRegex);
    }

    unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));
    unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;
    szErrMsg[unErrMsgLen] = '\0';
    printf("ErrMsg: %s\n", szErrMsg);

    regfree(&oRegex);
    return 1;
}

将之作一个链表的数据结构组包,增强程序的耦合性。

数据结构的示例可以看博文:http://blog.csdn.net/luopeiyuan1990/article/details/8255632

void Regex_MSG(pRet_Regex pret_regex,char *Regex_rule,char *pText_)
{
	pRet_Regex pret_regex_ = pret_regex ;
	char *pRegexStr = Regex_rule ;
	char * pText = pText_ ;

	regex_t oRegex;
	regmatch_t pmatch;

	int nErrCode = 0 ; int len ; int start ;int end ;
	char szErrMsg[1024] = {0};
	size_t unErrMsgLen = 0;

	if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
	{
		while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
		{
			pRet_Regex pNode =Before_Null_Node(pret_regex);

			//pRet_Regex pNode = CreateNode();
			AddNode(pNode,pNode->next);

			len = pmatch.rm_eo-pmatch.rm_so+2 ;
			start = pmatch.rm_so ;end =pmatch.rm_eo+2;

			pNode->next->Data=(char*)malloc((len)*(sizeof(char)));
			memcpy(pNode->next->Data,&pText[start],len);
			pNode->next->Data[(end)]=(char)NULL ;
			//printf("%s \n",pNode->next->Data);
			pText = &pText[end+1] ;
		}
		regfree(&oRegex);
		PrintList(pret_regex);
	}

	unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));
	unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;
	szErrMsg[unErrMsgLen] = '\0';
	printf("ErrMsg: %s\n", szErrMsg);

	regfree(&oRegex);
}


最后分拆为两个文件:

#include <sys/types.h>

#include "Eregex.hpp"


int main(int argc, char ** argv)
{
	// !AIVDM,\.{20,80}\,.[*]    ^(hisencyber)(.com|.com.cn)
	// \{m,n\}   "!AIVDM,\..*\,0*"  "!AIVDM,\.*\,0*"   REG_NOSUB  child char check
    //char * pRegexStr = (char *)"!AIVDM,\.{1,72}\[*]";
    char * pText_ =
	(char*)"!AIVDM,2,1,3,A,577K2:02;M1AI9HkP01DpUHE9<4j0lfo;33P000`40=664000422isC@,0*2F\
	!AIVDM,2,2,3,A,@h0000000000000,2*0F\
	!AIVDM,1,1,,B,9002=mQq1oIJvt6;2eUn>Sh0040<,0*5D\
	!AIVDM,1,1,,A,>5?Per18=HB1U:1@E=B0m<L,2*51\
	!AIVDM,1,1,,B,C5N3SRP0IFJKmE4:v6pBGw`@62PaLELTBJ:V00000000S0D:R220,0*3A\
	!AIVDM,2,1,5,B,E1mg=5J1T4W0h97aRh6ba84<h2d;W:Te=eLvH50```q,0*46\
	!AIVDM,2,2,5,B,:D44QDlp0C1DU00,2*36\
	!AIVDM,1,1,,B,15Mq4J0P01EREODRv4@74gv00HRq,0*88\
	!AIVDM,1,1,,A,;4eG>3iuaFP2:r3OiBH7;8i00000,0*65\
	!AIVDM,1,1,,A,B5N3SRP0FFJFC`4:CQDFKwiP200>,0*75\
	!AIVDM,1,1,,B,H5NHcTP<51@4TrM>10584@U<D000,2*77 ";

	pRet_Regex pret_regex;
	pret_regex = (pRet_Regex)malloc(sizeof(Ret_Regex)); pret_regex->next =NULL;
	//!AIVDM,\.{1,72}\[*]
	Regex_MSG(NULL,(char*)"!AIVDM,\.*\[*]",pText_);
}

另外一个是hpp文件,但是使用的时候最好写到c文件中

#include <stdio.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct Ret_Regex
{
	char *Data;
	unsigned char length;
	Ret_Regex *next;
}Ret_Regex,*pRet_Regex ;

#define Linkpointer pRet_Regex
#define LinkEntity Ret_Regex


void Entity_entity(Linkpointer Node)
{
	//Node->Data = readyplaindata.length ;
	//Node->Data = readyplaindata.Data ;
}
/* first is messy print */
void Entity_print(Linkpointer Node)
{
	printf("%s \n",Node->Data);
}
void PrintList(Linkpointer phead)
{
	Linkpointer p = phead ;
	while(p!=NULL)
	{
		Entity_print(p);
		p = p->next ;
	}
}
/* Length 1 no data only head */
int GetLength(Linkpointer phead)
{
	Linkpointer p = phead ; int Length=0 ;
	while(p!=NULL)
	{
		Length ++ ;
		p = p->next ;
	}
	return Length ;
}
Linkpointer Before_Null_Node(Linkpointer phead)
{
	Linkpointer p = phead ;
	while(p->next!=NULL)
	{
		p=p->next ;
	}
	return p ;
}
/* Create,return Node add */
Linkpointer CreateNode()
{
	Linkpointer pNode = (Linkpointer)malloc(sizeof(LinkEntity));
	if(pNode==NULL)
	{
		printf("Node malloc error \n");
	}
	Entity_entity(pNode);
	pNode->next = NULL ;
	return pNode ;
}
/* Add Node between */
void AddNode(Linkpointer pNode,Linkpointer pNode2)
{
	Linkpointer pNew = CreateNode();
	pNode->next = pNew ;
	pNew->next = pNode2 ;
}
/* Delete next Node */
bool DeleteNode(Linkpointer pNode)
{
	Linkpointer pDel = pNode->next ;
	if(pDel==NULL)
	{
		printf(" No Node to Delete ");
		return 0 ;
	}
	pNode->next = pDel->next ;

	pDel->next = NULL ;
	free(pDel);
	return 1 ;
}
/**************************************************************
 *  Two Mode
 *  1. find str meets Regex_rule , fill it into linklist
 *  (linklist must initialis first)
 *  (Head of pText will change)
 *  2. find first str meets Regex_rule , return it
 *  (Head of pText will change)
 *  pText is not freed , free yourself.
 *
 ***************************************************************/
char* Regex_MSG(pRet_Regex pret_regex_,char *Regex_rule,char *pText_)
{
	pRet_Regex pret_regex = pret_regex_ ;
	char *pRegexStr = Regex_rule ;
	char * pText = pText_ ;

	regex_t oRegex;
	regmatch_t pmatch;

	int nErrCode = 0 ; int len ; int start ;int end ;
	char szErrMsg[1024] = {0};
	size_t unErrMsgLen = 0;

	if(pret_regex!=NULL)
	{
		if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
		{
			while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
			{
				pRet_Regex pNode =Before_Null_Node(pret_regex);

				//pRet_Regex pNode = CreateNode();
				AddNode(pNode,pNode->next);

				len = pmatch.rm_eo-pmatch.rm_so+2 ;
				start = pmatch.rm_so ;end =pmatch.rm_eo+2;

				pNode->next->Data=(char*)malloc((len)*(sizeof(char)));
				memcpy(pNode->next->Data,&pText[start],len);
				pNode->next->Data[(end)]=(char)NULL ;
				//printf("%s \n",pNode->next->Data);
				pText = &pText[end+1] ;
			}
			regfree(&oRegex);
			PrintList(pret_regex);
		}
	}
	else{
		char *OutChar ;
		if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
		{
			while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
			{
				len = pmatch.rm_eo-pmatch.rm_so+2 ;
				start = pmatch.rm_so ;end =pmatch.rm_eo+2;

				OutChar=(char*)malloc((len)*(sizeof(char)));
				memcpy(OutChar,&pText[start],len);
				OutChar[(end)]=(char)NULL ;
				printf("Out char is %s \n",OutChar);
				pText = &pText[end+1] ;
				return OutChar ;
			}
		}
	}

	unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));
	unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;
	szErrMsg[unErrMsgLen] = '\0';
	printf("ErrMsg: %s\n", szErrMsg);

	regfree(&oRegex);
	return NULL ;
}



bug修改如下:


char* Regex_MSG(pRet_Regex pret_regex_,char *Regex_rule,char *pText_,pTAisRawData pAisRawData_,int FrontShifting,int BackShifting)
{
    pRet_Regex pret_regex = pret_regex_ ;
    pTAisRawData p = pAisRawData_ ;
    char *pRegexStr = Regex_rule ;
    char * pText = pText_ ;
    char *OutChar ;

    regex_t oRegex;
    regmatch_t pmatch;

    int nErrCode = 0 ; int len ; int start ;int end ;
    char szErrMsg[1024] = {0};
    size_t unErrMsgLen = 0;

    if(pret_regex!=NULL)
    {
        if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
        {
            while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
            {
                pRet_Regex pNode =Ret_Regex_Before_Null_Node(pret_regex);

                //pRet_Regex pNode = CreateNode();
                Ret_Regex_AddNode(pNode,pNode->next);

                len = pmatch.rm_eo-pmatch.rm_so+BackShifting ;
                start = pmatch.rm_so +FrontShifting;
                end =pmatch.rm_eo+BackShifting;

                pNode->next->Data=(char*)malloc((len)*(sizeof(char)));
                memcpy(pNode->next->Data,&pText[start],len);
                //pNode->next->Data[(end)]=(char)NULL ;
                //printf("%s \n",pNode->next->Data);
                pText = &pText[end] ;
            }
            Ret_Regex_PrintList(pret_regex);
        }
    }
    else{

        if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)
        {
            while(regexec(&oRegex, pText, 1, &pmatch,0)==0)
            {
                len = pmatch.rm_eo-pmatch.rm_so+BackShifting ;
                start = pmatch.rm_so +FrontShifting;end =pmatch.rm_eo+FrontShifting;

                OutChar=(char*)malloc((len)*(sizeof(char)));
                memcpy(OutChar,&pText[start],len);
                //OutChar[(end)]=(char)NULL ;
                printf("Out char is %s \n",OutChar);
                pText = &pText[end] ;

            }
            if(p!=NULL)
            {
                free(p->Data);
                p->Data=strdup(pText);
            }
        }
    }

    unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));
    unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;
    szErrMsg[unErrMsgLen] = '\0';
    printf("ErrMsg: %s\n", szErrMsg);

    regfree(&oRegex);
    return OutChar ;
}

















评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值