一、简介
标准的C和C++不支持正则表达式,但有一些函数库可以辅助C/C++程序员完成这一功能。正则表达式常用函数:编译正则表达式 regcomp()、匹配正则表达式 regexec()、释放正则表达式 regfree()。
二、详解
1、代码
regcomp.cpp:
- #include <iostream>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string>
- #include <regex.h>
- #include <assert.h>
- #include <string.h>
- using namespace std;
- int find_first(string input, string pattern, string &out){
- regex_t reg;
- regmatch_t pm[1];
- int iret = 0;
- out = "";
- /*编译正则表达式*/
- iret = regcomp(®, pattern.c_str(), REG_EXTENDED|REG_NEWLINE);
- if (iret != 0){
- return -1;
- }
- iret = regexec(®, input.c_str(), 1, pm, 0);
- if (iret == REG_NOMATCH){
- out = "";
- iret = input.length();
- }else if (iret != 0) {
- return -2;
- }else{
- out = input.substr(pm[0].rm_so,pm[0].rm_eo-pm[0].rm_so);
- iret = pm[0].rm_eo;
- }
- regfree(®);
- return iret;
- }
- int find_first(char *buff, char *pattern, char *outdata){
- regex_t reg;
- regmatch_t pm[1];
- int status = 0;
- /*编译正则表达式*/
- status = regcomp(®, pattern, REG_EXTENDED|REG_NEWLINE); //扩展正则表达式和识别换行符
- if (status != 0){ //成功返回0
- return -1;
- }
- status = regexec(®, buff, 1, pm, 0);
- if (status == REG_NOMATCH){
- printf("no match!\n");
- status = -1;
- }
- else if (status != 0) {
- return -2;
- }
- else if (status == 0) {
- int i, j;
- for (i = pm[0].rm_so, j = 0; i < pm[0].rm_eo; i++, j++) {
- outdata[j] = buff[i];
- }
- outdata[i] = '\0';
- }
- regfree(®);
- return status;
- }
- int find_all(char *buff, char *pattern, char result[][20]){ //返回匹配个数
- regex_t reg;
- regmatch_t pm[1];
- int status = 0;
- char * p = buff;
- int count = 0;
- /*编译正则表达式*/
- status = regcomp(®, pattern, REG_EXTENDED|REG_NEWLINE); //扩展正则表达式和识别换行符
- if (status != 0){ //成功返回0
- return -1;
- }
- int i = 0, j, k;
- while((status = regexec(®, p, 1, pm, 0)) == 0) {
- for(j = pm[0].rm_so, k = 0; j < pm[0].rm_eo; j++) {
- result[i][k++] = p[j];
- }
- result[i][k] = '\0';
- i++;
- p += pm[0].rm_eo;
- count++;
- if (*p == '\0') break;
- }
- regfree(®);
- return count;
- }
- int print_file(const char *file_name, const char *pattern)
- {
- regex_t reg;
- regmatch_t pm[1];
- int status = 0;
- int count = 0;
- FILE *fp = fopen(file_name, "r+");
- assert(fp);
- char buff[1024] = {0};
- char output[1024] = {0};
- /*编译正则表达式*/
- status = regcomp(®, pattern, REG_EXTENDED|REG_NEWLINE); //扩展正则表达式和识别换行符
- assert(status == 0);
- while(fgets(buff, sizeof(buff), fp)) { //循环读取文件
- char * p = buff;
- while(1) {
- status = regexec(®, p, 1, pm, 0);
- if (status == 0) { //匹配成功
- count++;
- strncpy(output, p + pm[0].rm_so, pm[0].rm_eo - pm[0].rm_so);
- cout<<"匹配:"<<output<<endl;
- p += pm[0].rm_eo;
- }
- else {
- break;
- }
- }
- }
- regfree(®);
- return count;
- }
- int main()
- {
- char result[20][20] = {0};
- char buf[] = "1231a4568b789c234";
- char pattern[] = "[0-9]{3}";
- char resultfirst[20] = {0};
- find_first(buf, pattern, resultfirst);
- cout<<strlen(resultfirst) << ":" << resultfirst<<endl;
- cout << "***************************" <<endl;
- int count = find_all(buf, pattern, result);
- for (int i = 0; i < count; i++) {
- cout<<"result:"<<"i="<<i+1<<"----"<<result[i]<<endl;
- }
- cout << "***************************" <<endl;
- count = print_file("test.txt", "[0-9]{5}");
- cout<<"匹配的个数:"<<count<<endl;
- return 0;
- }
2、编译运行
- g++ -o regcomp regcomp.cpp
- ./regcomp
当前目录下的测试文件test.txt的内容:
- abc12345678
- ddd55555555hhh
- 123456
三、总结
(1)正则表达式的语法可参考http://deerchao.net/tutorials/regex/regex-1.htm、在线正则表达式测试http://tool.oschina.net/regex。
(2)若有建议,请留言,在此先感谢