regex

The regex match and search interface


//main.cpp
#include <regex>
#include <iostream>
using namespace std;

void out(bool b)
{
	cout << (b ? "found" : "not found") << endl;
}

int main()
{
	// find XML/HTML-tagged value (using default syntax):
	regex reg1("<.*>.*</.*>");
	bool found = regex_match("<tag>value</tag>",   // data
		reg1);                // regular expression
	out(found);

	// find XML/HTML-tagged value (tags before and after the value must match):
	regex reg2("<(.*)>.*</\\1>");
	found = regex_match("<tag>value</tag>",        // data
		reg2);                     // regular expression
	out(found);

	// find XML/HTML-tagged value (using grep syntax):
	regex reg3("<\\(.*\\)>.*</\\1>", regex_constants::grep);
	found = regex_match("<tag>value</tag>",        // data
		reg3);                     // regular expression
	out(found);

	// use C-string as regular expression (needs explicit cast to regex):
	found = regex_match("<tag>value</tag>",        // data
		regex("<(.*)>.*</\\1>"));  // regular expression
	out(found);
	cout << endl;

	// regex_match() versus regex_search():
	found = regex_match("XML tag: <tag>value</tag>",
		regex("<(.*)>.*</\\1>"));         // fails to match
	out(found);
	found = regex_match("XML tag: <tag>value</tag>",
		regex(".*<(.*)>.*</\\1>.*"));     // matches
	out(found);
	found = regex_search("XML tag: <tag>value</tag>",
		regex("<(.*)>.*</\\1>"));        // matches
	out(found);
	found = regex_search("XML tag: <tag>value</tag>",
		regex(".*<(.*)>.*</\\1>.*"));    // matches
	out(found);
}



dealing with subexpressions

#include <string>
#include <regex>
#include <iostream>
#include <iomanip>
using namespace std;

int main()
{
	string data = "XML tag: <tag-name>the value</tag-name>.";
	cout << "data:             " << data << "\n\n";

	smatch m;  // for returned details of the match
	bool found = regex_search(data,
		m,
		regex("<(.*)>(.*)</(\\1)>"));

	// print match details:
	cout << "m.empty():        " << boolalpha << m.empty() << endl;
	cout << "m.size():         " << m.size() << endl;
	if (found) {
		cout << "m.str():          " << m.str() << endl;
		cout << "m.length():       " << m.length() << endl;
		cout << "m.position():     " << m.position() << endl;
		cout << "m.prefix().str(): " << m.prefix().str() << endl;
		cout << "m.suffix().str(): " << m.suffix().str() << endl;
		cout << endl;

		// iterating over all matches (using the match index):
		for (int i = 0; i<m.size(); ++i) {
			cout << "m[" << i << "].str():       " << m[i].str() << endl;
			cout << "m.str(" << i << "):         " << m.str(i) << endl;
			cout << "m.position(" << i << "):    " << m.position(i)
				<< endl;
		}
		cout << endl;

		// iterating over all matches (using iterators):
		cout << "matches:" << endl;
		for (auto pos = m.begin(); pos != m.end(); ++pos) {
			cout << " " << *pos << " ";
			cout << "(length: " << pos->length() << ")" << endl;
		}
	}
}

data:             XML tag: <tag-name>the value</tag-name>


m.empty():        false
m.size():         4
m.str():          <tag-name>the value</tag-name>
m.length():       30
m.position():     9
m.prefix().str(): XML tag:
m.suffix().str(): .


m[0].str():       <tag-name>the value</tag-name>
m.str(0):         <tag-name>the value</tag-name>
m.position(0):    9
m[1].str():       tag-name
m.str(1):         tag-name
m.position(1):    10
m[2].str():       the value
m.str(2):         the value
m.position(2):    19
m[3].str():       tag-name
m.str(3):         tag-name
m.position(3):    30


matches:
 <tag-name>the value</tag-name> (length: 30)
 tag-name (length: 8)
 the value (length: 9)
 tag-name (length: 8)
请按任意键继续. . .


find all matches of a regular expression

#include <string>
#include <regex>
#include <iostream>
using namespace std;

int main()
{
	string data = "<person>\n"
		" <first>Nico</first>\n"
		" <last>Josuttis</last>\n"
		"</person>\n";

	regex reg("<(.*)>(.*)</(\\1)>");

	// iterate over all matches
	auto pos = data.cbegin();
	auto end = data.cend();
	smatch m;
	for (; regex_search(pos, end, m, reg); pos = m.suffix().first) {
		cout << "match:  " << m.str() << endl;
		cout << " tag:   " << m.str(1) << endl;
		cout << " value: " << m.str(2) << endl;
	}
}

match:  <first>Nico</first>
 tag:   first
 value: Nico
match:  <last>Josuttis</last>
 tag:   last
 value: Josuttis
请按任意键继续. . .


Regex iterators


#include <string>
#include <regex>
#include <iostream>
#include <algorithm>
using namespace std;

int main()
{
	string data = "<person>\n"
		" <first>Nico</first>\n"
		" <last>Josuttis</last>\n"
		"</person>\n";

	regex reg("<(.*)>(.*)</(\\1)>");

	// iterate over all matches (using a regex_iterator):
	sregex_iterator pos(data.cbegin(), data.cend(), reg);
	sregex_iterator end;
	for (; pos != end; ++pos) {
		cout << "match:  " << pos->str() << endl;
		cout << " tag:   " << pos->str(1) << endl;
		cout << " value: " << pos->str(2) << endl;
	}

	// use a regex_iterator to process each matched substring as element in an algorithm:
	sregex_iterator beg(data.cbegin(), data.cend(), reg);
	for_each(beg, end, [](const smatch& m) {
		cout << "match:  " << m.str() << endl;
		cout << " tag:   " << m.str(1) << endl;
		cout << " value: " << m.str(2) << endl;
	});
}

match:  <first>Nico</first>
 tag:   first
 value: Nico
match:  <last>Josuttis</last>
 tag:   last
 value: Josuttis
match:  <first>Nico</first>
 tag:   first
 value: Nico
match:  <last>Josuttis</last>
 tag:   last
 value: Josuttis
请按任意键继续. . .


regex Token iterators


#include <string>
#include <regex>
#include <iostream>
#include <algorithm>
using namespace std;

int main()
{
	string data = "<person>\n"
		" <first>Nico</first>\n"
		" <last>Josuttis</last>\n"
		"</person>\n";

	regex reg("<(.*)>(.*)</(\\1)>");

	// iterate over all matches (using a regex_token_iterator):
	sregex_token_iterator pos(data.cbegin(), data.cend(), // sequence
		reg,                       // token separator
		{ 0,2 });      // 0: full match, 2: second substring
	sregex_token_iterator end;
	for (; pos != end; ++pos) {
		cout << "match:  " << pos->str() << endl;
	}
	cout << endl;

	string names = "nico, jim, helmut, paul, tim, john paul, rita";
	regex sep("[ \t\n]*[,;.][ \t\n]*");  // separated by , ; or . and spaces
	sregex_token_iterator p(names.cbegin(), names.cend(),  // sequence
		sep,                          // separator
		-1);        // -1: values between separators
	sregex_token_iterator e;
	for (; p != e; ++p) {
		cout << "name:  " << *p << endl;
	}
}

match:  <first>Nico</first>
match:  Nico
match:  <last>Josuttis</last>
match:  Josuttis


name:  nico
name:  jim
name:  helmut
name:  paul
name:  tim
name:  john paul
name:  rita
请按任意键继续. . .


replacing regular expressions

#include <string>
#include <regex>
#include <iostream>
#include <iterator>
using namespace std;
int main()
{
	string data = "<person>\n"
		" <first>Nico</first>\n"
		" <last>Josuttis</last>\n"
		"</person>\n";
	regex reg("<(.*)>(.*)</(\\1)>");
	// print data with replacement for matched patterns
	cout << regex_replace(data, // data
		reg, // regular expression
		"<$1 value=\"$2\"/>") // replacement
		<< endl;
	// same using sed syntax
	cout << regex_replace(data, // data
		reg, // regular expression
		"<\\1 value=\"\\2\"/>", // replacement
		regex_constants::format_sed) // format flag
		<< endl;
	// use iterator interface, and
	// - format_no_copy: don’t copy characters that don’t match
	// - format_first_only: replace only the first match found
	string res2;
	regex_replace(back_inserter(res2), // destination
		data.begin(), data.end(), // source range
		reg, // regular expression
		"<$1 value=\"$2\"/>", // replacement
		regex_constants::format_no_copy // format flags
		| regex_constants::format_first_only);
	cout << res2 << endl;
}

<person>
 <first value="Nico"/>
 <last value="Josuttis"/>
</person>


<person>
 <first value="Nico"/>
 <last value="Josuttis"/>
</person>


<first value="Nico"/>
请按任意键继续. . .


Regex Flags

#include <string>
#include <regex>
#include <iostream>
using namespace std;

int main()
{
	// case-insensitive find LaTeX index entries
	string pat1 = R"(\\.*index\{([^}]*)\})";       // first capture group
	string pat2 = R"(\\.*index\{(.*)\}\{(.*)\})";  // 2nd and 3rd capture group
	regex pat(pat1 + "\n" + pat2,
		regex_constants::egrep | regex_constants::icase);
	//Using the egrep grammar, we can put a newline character between these two regular expressions.

	// initialize string with characters from standard input:
	string data((istreambuf_iterator<char>(cin)),
		istreambuf_iterator<char>());

	// search and print matching index entries:
	smatch m;
	auto pos = data.cbegin();
	auto end = data.cend();
	for (; regex_search(pos, end, m, pat); pos = m.suffix().first) {
		cout << "match: " << m.str() << endl;
		cout << "  val: " << m.str(1) + m.str(2) << endl;
		cout << "  see: " << m.str(3) << endl;
	}
}


\chapter{The Standard Template Library}
\index{STL}%
\MAININDEX{standard template library}%
\SEEINDEX{standard template library}{STL}%
This is the basic chapter about the STL.
\section{STL Components}
\hauptindex{STL, introduction}%
The \stl{} is based on the cooperation of
^Z
match: \index{STL}
  val: STL
  see:
match: \MAININDEX{standard template library}
  val: standard template library
  see:
match: \SEEINDEX{standard template library}{STL}
  val: standard template library
  see: STL
match: \hauptindex{STL, introduction}
  val: STL, introduction
  see:
请按任意键继续. . .


Regex exceptions

//regexexception.hpp
#include <regex>
#include <string>

template <typename T>
std::string regexCode(T code)
{
	switch (code) {
	case std::regex_constants::error_collate:
		return "error_collate: "
			"regex has invalid collating element name";
	case std::regex_constants::error_ctype:
		return "error_ctype: "
			"regex has invalid character class name";
	case std::regex_constants::error_escape:
		return "error_escape: "
			"regex has invalid escaped char. or trailing escape";
	case std::regex_constants::error_backref:
		return "error_backref: "
			"regex has invalid back reference";
	case std::regex_constants::error_brack:
		return "error_brack: "
			"regex has mismatched '[' and ']'";
	case std::regex_constants::error_paren:
		return "error_paren: "
			"regex has mismatched '(' and ')'";
	case std::regex_constants::error_brace:
		return "error_brace: "
			"regex has mismatched '{' and '}'";
	case std::regex_constants::error_badbrace:
		return "error_badbrace: "
			"regex has invalid range in {} expression";
	case std::regex_constants::error_range:
		return "error_range: "
			"regex has invalid character range, such as '[b-a]'";
	case std::regex_constants::error_space:
		return "error_space: "
			"insufficient memory to convert regex into finite state";
	case std::regex_constants::error_badrepeat:
		return "error_badrepeat: "
			"one of *?+{ not preceded by valid regex";
	case std::regex_constants::error_complexity:
		return "error_complexity: "
			"complexity of match against regex over pre-set level";
	case std::regex_constants::error_stack:
		return "error_stack: "
			"insufficient memory to determine regex match";
	}
	return "unknown/non-standard regex error code";
}

//Source.cpp
#include <regex>
#include <iostream>
#include "regexexception.hpp"
using namespace std;

int main()
{
	try {
		// initialize regular expression with invalid syntax:
		regex pat("\\\\.*index\\{([^}]*)\\}",
			regex_constants::grep | regex_constants::icase);
		//...
	}
	catch (const regex_error& e) {
		cerr << "regex_error: \n"
			<< " what(): " << e.what() << "\n"
			<< " code(): " << regexCode(e.code()) << endl;
	}
}

regex_error:
 what(): regex_error(error_badbrace): The expression contained an invalid range
in a { expression }.
 code(): error_badbrace: regex has invalid range in {} expression
请按任意键继续. . .




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值