一、前言
正则表达式,很方便,那么该怎么学习呢?
知乎中有这样一篇回答应该怎么练习使用正则表达式?很有意思。
其中有三个回答,我觉得很有用处。
正则入门:正则王国奇遇记
疯狂练习:在线练习正则表达式
深入正则:一个大佬的博客
下面是本人对C++中的正则表达式的理解。
二、match/search/replace
2.1 regex_match
regex_match可以理解为全匹配,在写正则时,需要与待匹配字符串格式保持一致。
#include <regex>
#include <string>
#include <iostream>
using namespace std;
int main(){
string s = "A124703402B";
regex reg("\\w(\\d{9})\\w");
smatch m;
bool ret = regex_match(s, m, reg);
cout << (ret ? "valid" : "invalid") << endl;
cout << "length: " << m.length() << endl;
for (int i = 0; i < m.size(); i++){
cout << "m.str(" << i << "): "<< m.str(i) << endl;
}
cin.ignore();
return 0;
}
2.2 regex_search
search是查找子串,只要子串中包涵有你写的正则规则就可以匹配到。
#include <regex>
#include <string>
#include <iostream>
using namespace std;
int main(){
string s = "A124703402B";
regex reg("\\d{9}");
smatch m;
bool ret = regex_search(s, m, reg);
cout << (ret ? "valid" : "invalid") << endl;
cout << "length: " << m.length() << endl;
for (int i = 0; i < m.size(); i++){
cout << "m.str(" << i << "): "<< m.str(i) << endl;
}
cin.ignore();
return 0;
}
2.3 regex_replace
replace是替换,可以正向的替换,也可以反向的替换与正则规则相同的字符串。
正向替换
#include <regex>
#include <string>
#include <iostream>
using namespace std;
int main(){
string s = "肚…子。。好饿…,…早知道…当…初…。。。多…刷…点。。。力…扣了…!";
regex reg("…|。");
string res = regex_replace(s,reg,"");
cout << "s: " << s << endl;
cout << "res: " << res << endl;
cin.ignore();
return 0;
}
反向提取
#include <regex>
#include <string>
#include <iostream>
using namespace std;
int main(){
string s = "name:lyrics";
regex reg("name:(\\w+)");
string res = regex_replace(s,reg,"$1");
cout << "s: " << s << endl;
cout << "res: " << res << endl;
cin.ignore();
return 0;
}
三、基础
3.1开头与结尾
^限定开头的字符
#include <regex>
#include <string>
#include <iostream>
using namespace std;
int main(){
string s = "B124703402B";
string s2 = "A124703402B";
regex reg("^A(\\d{9})\\w");
smatch m;
bool ret = regex_match(s, reg);
cout << s << ": " << (ret ? "valid" : "invalid") << endl;
ret = regex_match(s2, reg);
cout << s2 << ": " << (ret ? "valid" : "invalid") << endl;
cin.ignore();
return 0;
}
$限定结尾的字符
#include <regex>
#include <string>
#include <iostream>
using namespace std;
int main(){
string s = "B124703402C";
string s2 = "A124703402B";
regex reg("\\w(\\d{9})B$");
smatch m;
bool ret = regex_match(s, reg);
cout << s << ": " << (ret ? "valid" : "invalid") << endl;
ret = regex_match(s2, reg);
cout << s2 << ": " << (ret ? "valid" : "invalid") << endl;
cin.ignore();
return 0;
}
3.2 \d \s \w
\d 数字0到9
\D 反选
\s 空格
\S 反选
\w 字符(英文、下划线、数字)
\W 反选
#include <regex>
#include <string>
#include <iostream>
using namespace std;
int main(){
string s = "B124703402C";
string s2 = "A12470 3402E";
regex reg("\\w\\d+\\s\\d+\\w");
smatch m;
bool ret = regex_match(s, reg);
cout << s << ": " << (ret ? "valid" : "invalid") << endl;
ret = regex_match(s2, reg);
cout << s2 << ": " << (ret ? "valid" : "invalid") << endl;
cin.ignore();
return 0;
}
3.3 . * + ?
.任意单个字符
*前一个字符任意次
+前一个字符至少出现一次
?前一个字符至多出现一次
#include <regex>
#include <string>
#include <iostream>
using namespace std;
int main(){
string s = "B124703402C";
string s2 = "A124703402E";
regex reg("^(?!B).*"); // 不以字符B开头的任意字符串
smatch m;
bool ret = regex_match(s, reg);
cout << s << ": " << (ret ? "valid" : "invalid") << endl;
ret = regex_match(s2, reg);
cout << s2 << ": " << (ret ? "valid" : "invalid") << endl;
cin.ignore();
return 0;
}
3.4 () {} []
()分组:(123),这样可以将匹配到的123取出来
{}长度:{4,9},这个表示前一个字符串的长度为4到9
[]范围:[a-z],这个表示匹配所有的小写字母
#include <regex>
#include <string>
#include <iostream>
using namespace std;
int main(){
string s = "B1247abscd03402C";
regex reg(".*([a-z]{5}).*");
smatch m;
bool ret = regex_search(s,m, reg);
cout << s << ": " << (ret ? "valid" : "invalid") << endl;
cout << "length: " << m.length() << endl;
for (int i = 0; i < m.size(); i++){
cout << "m.str(" << i << "): " << m.str(i) << endl;
}
cin.ignore();
return 0;
}
四、最后
又凌晨了,哎,日常熬夜,晚安!