正则表达式的匹配函数

正则表达式的匹配函数

功能:根据正则表达式匹配目标字符串,并返回匹配字符串的开始位置
    参数:
        arg1 -- 目标字符串
        arg2 -- 正则表达式
        arg3 -- 从目标字符串什么位置开始匹配
        arg4 -- 标识第几个匹配的内容(目标字符串中可能有几处可以匹配成功)
        
    要求:
        1. 输入参数为arg1,arg2时,arg3,arg4默认值为1
        2. 输入参数为arg1,arg2,arg3时,arg4默认为1
        3. 三种输入不同的参数在一个函数中完成

 

PG 有正则表达式的相关功能,可以参考PG的函数实现上述功能

PostgreSQL 正则表达式 常用函数:https://my.oschina.net/yonj1e/blog/879875

大致的实现思路

pg_proc.h 添加函数声明,myfuncs.c 实现函数,获取参数,根据参数 arg3 修改目标字符串,编译正则表达式,匹配正则表达式,输出匹配位置,释放,添加错误提示信息。

添加函数声明

src/include/catalog/pg_proc.h

DATA(insert OID = 6672 (  regexp_match_str PGNSP PGUID 12 1 10 0 0 f f f f t t i 2 0 23 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_match_str _null_ _null_ _null_ ));
DESCR("Regular expression matches the target string.");
DATA(insert OID = 6673 (  regexp_match_str PGNSP PGUID 12 1 10 0 0 f f f f t t i 3 0 23 "25 25 23" _null_ _null_ _null_ _null_ _null_  regexp_match_str _null_ _null_ _null_ ));
DESCR("Regular expression matches the target string.");
DATA(insert OID = 6674 (  regexp_match_str PGNSP PGUID 12 1 10 0 0 f f f f t t i 4 0 23 "25 25 23 23" _null_ _null_ _null_ _null_ _null_ regexp_match_str _null_ _null_ _null_ ));
DESCR("Regular expression matches the target string.");

实现函数

获取参数,返回结果

#define PG_GETARG_IF_EXISTS(n, type, defval) \
	((PG_NARGS() > (n) && !PG_ARGISNULL(n)) ? PG_GETARG_##type(n) : (defval))

/* 
 * Regular expression matches the target string. 
 * author:young
 */
Datum 
regexp_match_str (PG_FUNCTION_ARGS)
{
	text *orginal_str = PG_GETARG_TEXT_P(0);
	text *text_re = PG_GETARG_TEXT_P(1);
	int32 beg = PG_GETARG_IF_EXISTS(2, INT32, 1);
	int32 num = PG_GETARG_IF_EXISTS(3, INT32, 1);

	PG_RETURN_INT32(0);
}

 修改目标字符串从arg3开始匹配,没有输入是默认值是 1

    /* Modify the target string, starting from the beg position */
	char *src;
	int len;
	text *orig_str;
	char dest[len];
	
	src = text_to_cstring(orginal_str);	
	len = strlen(src);
	strncpy(dest, src + beg - 1, len - beg + 1);
	//dest[len - beg + 1] = '\0';
	orig_str = cstring_to_text(dest);

 PG 中处理正则表达式常用函数及相关参数类型的声明

/*
 * the prototypes for exported functions
 */
extern int	pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid);
extern int	pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);
extern void pg_regfree(regex_t *);

typedef unsigned int pg_wchar;

int
pg_regcomp(regex_t *re,
		   const chr *string, /* 正则表达式字符串 */
		   size_t len, /* 正则表达式字符串长度 */
		   int flags,
		   Oid collation)

int
pg_regexec(regex_t *re, /* 已经用regcomp函数编译好的正则表达式 */
		   const chr *string, /* 目标字符串 */
		   size_t len, /* 目标字符串长度 */
		   size_t search_start, /* 匹配开始位置 */
		   rm_detail_t *details, /* NULL */
		   size_t nmatch, /* 是regmatch_t结构体数组的长度 */
		   regmatch_t pmatch[], /*  regmatch_t类型的结构体数组,存放匹配文本串的位置信息 */
		   int flags)

 pg_regcomp()、pg_regexec()编译匹配字符串的类型是pg_wchar,所以需要先对目标字符串,正则表达式进行类型转换(可参考PG内置的几个正则函数,regexp_matches()等)

	int orig_len;
	pg_wchar *wide_str;
	int wide_len;

	int text_re_len;
	pg_wchar *pattern;
	int pattern_len;

	/* convert string to pg_wchar form for matching */
	orig_len = VARSIZE_ANY_EXHDR(orig_str);
	wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
	wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);

	/* Convert pattern string to wide characters */
	text_re_len = VARSIZE_ANY_EXHDR(text_re);
	pattern = (pg_wchar *) palloc(sizeof(pg_wchar) * (text_re_len + 1));
	pattern_len = pg_mb2wchar_with_len(VARDATA_ANY(text_re), pattern, text_re_len);

 编译,匹配,释放

    regex_t re;
	int cflags = REG_ADVANCED;
	//Oid collation = PG_GET_COLLATION();

	regmatch_t *pmatch;
	int pmatch_len;
	bool use_subpatterns = true;

	int start_search = 0;
	int prev_match_end = 0;
	int count = 1;

    pg_regcomp(&re, pattern, pattern_len, cflags, PG_GET_COLLATION());

    if (use_subpatterns && re.re_nsub > 0)
	{
		pmatch_len = re.re_nsub + 1;
	}
	else
	{
		use_subpatterns = false;
		pmatch_len = 1;
	}

	pmatch = palloc(sizeof(regmatch_t) * pmatch_len);

    while (pg_regexec(&re, wide_str, wide_len, start_search, NULL, pmatch_len, pmatch, 0) == REG_OKAY)
	{
		if (num == count)
		{
			PG_RETURN_INT32(pmatch[0].rm_so + 1);
		}
		count++;
		prev_match_end = pmatch[0].rm_eo;

		start_search = prev_match_end;

		if (pmatch[0].rm_so == pmatch[0].rm_eo)
		{
			start_search++;
		}

		if (start_search > wide_len)
		{
			break;
		}
	}

    pg_regfree(&re);

 添加上参数错误提示,执行 regcomp 或者 regexec 产生错误的时候,regerror返回一个包含错误信息的字符串,完整的代码如下:

/* 
 * Regular expression matches the target string. 
 * author:young
 */
Datum 
regexp_match_str (PG_FUNCTION_ARGS)
{
	text *orginal_str = PG_GETARG_TEXT_P(0);
	text *text_re = PG_GETARG_TEXT_P(1);
	int32 beg = PG_GETARG_IF_EXISTS(2, INT32, 1);
	int32 num = PG_GETARG_IF_EXISTS(3, INT32, 1);

	/* Modify the target string, starting from the beg position */
	char *src;
	int len;
	text *orig_str;
	char dest[len];
	
	src = text_to_cstring(orginal_str);	
	len = strlen(src);
	strncpy(dest, src + beg - 1, len - beg + 1);
	//dest[len - beg + 1] = '\0';
	orig_str = cstring_to_text(dest);

	if (!((1 <= beg && beg <= len) && (1 <= num)))

	{
		ereport(ERROR,
			(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
			errmsg("Parameters> = 1, such as 1.2 ....")));
	}

	int orig_len;
	pg_wchar *wide_str;
	int wide_len;

	int text_re_len;
	pg_wchar *pattern;
	int pattern_len;

	regex_t re;
	int cflags = REG_ADVANCED;
	//Oid collation = PG_GET_COLLATION();
	regmatch_t *pmatch;
	int pmatch_len;
	bool use_subpatterns = true;
	int start_search = 0;
	int prev_match_end = 0;
	int count = 1;

	/* convert string to pg_wchar form for matching */
	orig_len = VARSIZE_ANY_EXHDR(orig_str);
	wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
	wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);

	/* Convert pattern string to wide characters */
	text_re_len = VARSIZE_ANY_EXHDR(text_re);
	pattern = (pg_wchar *) palloc(sizeof(pg_wchar) * (text_re_len + 1));
	pattern_len = pg_mb2wchar_with_len(VARDATA_ANY(text_re), pattern, text_re_len);

	int regcomp_result;
	char errMsg[100];
	regcomp_result = pg_regcomp(&re, pattern, pattern_len, cflags, PG_GET_COLLATION());
	
	if (regcomp_result != REG_OKAY)
	{
		pg_regerror(regcomp_result, &re, errMsg, sizeof(errMsg));
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
				 errmsg("invalid regular expression: %s", errMsg)));
	}

	if (use_subpatterns && re.re_nsub > 0)
	{
		pmatch_len = re.re_nsub + 1;
	}
	else
	{
		use_subpatterns = false;
		pmatch_len = 1;
	}

	pmatch = palloc(sizeof(regmatch_t) * pmatch_len);

	int regexec_result;
	regexec_result = pg_regexec(&re, wide_str, wide_len, start_search, NULL, pmatch_len, pmatch, 0);

	if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
	{
		pg_regerror(regexec_result, &re, errMsg, sizeof(errMsg));
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
				 errmsg("regular expression failed: %s", errMsg)));
	}

	bool match_result = false;
	while (pg_regexec(&re, wide_str, wide_len, start_search, NULL, pmatch_len, pmatch, 0) == REG_OKAY)
	{
		match_result = true;
		if (num == count)
		{
			PG_RETURN_INT32(pmatch[0].rm_so + 1);
		}
		count++;
		prev_match_end = pmatch[0].rm_eo;

		start_search = prev_match_end;

		if (pmatch[0].rm_so == pmatch[0].rm_eo)
		{
			start_search++;
		}

		if (start_search > wide_len)
		{
			break;
		}
	}

	if (count == 1 || match_result)
	{
		ereport(ERROR,
			(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
			errmsg("NO MATCH!")));
	}

	pg_regfree(&re);

	PG_RETURN_INT32(0);
}

测试:

postgres=# select regexp_match_str('young@yahoo.com young@highgo.com','(\w+@[\w.]+com)',1,2);
 regexp_match_str 
------------------
               17
(1 row)

postgres=# select regexp_match_str('young@yahoo.com young@highgo.com','(\w+@[\w.]+com)');
 regexp_match_str 
------------------
                1
(1 row)

postgres=# select regexp_match_str('young@yahoo.com young@highgo.com','(\w+@[\w.]+com)',1,2);
 regexp_match_str 
------------------
               17
(1 row)

postgres=# select regexp_match_str('young@yahoo.com young@highgo.com','(\w+@[\w.]+com)',1,3);
ERROR:  NO MATCH!
STATEMENT:  select regexp_match_str('young@yahoo.com young@highgo.com','(\w+@[\w.]+com)',1,3);
ERROR:  NO MATCH!
postgres=# 

 

转载于:https://my.oschina.net/yonj1e/blog/879874

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值