一、简介
标准的C和C++不支持正则表达式,但有一些函数库可以辅助C/C++程序员完成这一功能。正则表达式常用函数:编译正则表达式 regcomp()、匹配正则表达式 regexec()、释放正则表达式 regfree()。
二、详解
1、代码
regcomp.cpp:
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <regex.h>
#include <assert.h>
#include <string.h>
using namespace std;
int find_first(string input, string pattern, string &out){
regex_t reg;
regmatch_t pm[1];
int iret = 0;
out = "";
/*编译正则表达式*/
iret = regcomp(®, pattern.c_str(), REG_EXTENDED|REG_NEWLINE);
if (iret != 0){
return -1;
}
iret = regexec(®, input.c_str(), 1, pm, 0);
if (iret == REG_NOMATCH){
out = "";
iret = input.length();
}else if (iret != 0) {
return -2;
}else{
out = input.substr(pm[0].rm_so,pm[0].rm_eo-pm[0].rm_so);
iret = pm[0].rm_eo;
}
regfree(®);
return iret;
}
int find_first(char *buff, char *pattern, char *outdata){
regex_t reg;
regmatch_t pm[1];
int status = 0;
/*编译正则表达式*/
status = regcomp(®, pattern, REG_EXTENDED|REG_NEWLINE); //扩展正则表达式和识别换行符
if (status != 0){ //成功返回0
return -1;
}
status = regexec(®, buff, 1, pm, 0);
if (status == REG_NOMATCH){
printf("no match!\n");
status = -1;
}
else if (status != 0) {
return -2;
}
else if (status == 0) {
int i, j;
for (i = pm[0].rm_so, j = 0; i < pm[0].rm_eo; i++, j++) {
outdata[j] = buff[i];
}
outdata[i] = '\0';
}
regfree(®);
return status;
}
int find_all(char *buff, char *pattern, char result[][20]){ //返回匹配个数
regex_t reg;
regmatch_t pm[1];
int status = 0;
char * p = buff;
int count = 0;
/*编译正则表达式*/
status = regcomp(®, pattern, REG_EXTENDED|REG_NEWLINE); //扩展正则表达式和识别换行符
if (status != 0){ //成功返回0
return -1;
}
int i = 0, j, k;
while((status = regexec(®, p, 1, pm, 0)) == 0) {
for(j = pm[0].rm_so, k = 0; j < pm[0].rm_eo; j++) {
result[i][k++] = p[j];
}
result[i][k] = '\0';
i++;
p += pm[0].rm_eo;
count++;
if (*p == '\0') break;
}
regfree(®);
return count;
}
int print_file(const char *file_name, const char *pattern)
{
regex_t reg;
regmatch_t pm[1];
int status = 0;
int count = 0;
FILE *fp = fopen(file_name, "r+");
assert(fp);
char buff[1024] = {0};
char output[1024] = {0};
/*编译正则表达式*/
status = regcomp(®, pattern, REG_EXTENDED|REG_NEWLINE); //扩展正则表达式和识别换行符
assert(status == 0);
while(fgets(buff, sizeof(buff), fp)) { //循环读取文件
char * p = buff;
while(1) {
status = regexec(®, p, 1, pm, 0);
if (status == 0) { //匹配成功
count++;
strncpy(output, p + pm[0].rm_so, pm[0].rm_eo - pm[0].rm_so);
cout<<"匹配:"<<output<<endl;
p += pm[0].rm_eo;
}
else {
break;
}
}
}
regfree(®);
return count;
}
int main()
{
char result[20][20] = {0};
char buf[] = "1231a4568b789c234";
char pattern[] = "[0-9]{3}";
char resultfirst[20] = {0};
find_first(buf, pattern, resultfirst);
cout<<strlen(resultfirst) << ":" << resultfirst<<endl;
cout << "***************************" <<endl;
int count = find_all(buf, pattern, result);
for (int i = 0; i < count; i++) {
cout<<"result:"<<"i="<<i+1<<"----"<<result[i]<<endl;
}
cout << "***************************" <<endl;
count = print_file("test.txt", "[0-9]{5}");
cout<<"匹配的个数:"<<count<<endl;
return 0;
}
2、编译运行
g++ -o regcomp regcomp.cpp
./regcomp
当前目录下的测试文件test.txt的内容:
abc12345678
ddd55555555hhh
123456
三、总结
(1)正则表达式的语法可参考http://deerchao.net/tutorials/regex/regex-1.htm、在线正则表达式测试http://tool.oschina.net/regex。(2)若有建议,请留言,在此先感谢!