头文件
boost/algorithm/string/regex.hpp
作用
string的regex有如下API
find_regex:查找字符串str中第一次满足 regex的子串,返回子串起始,终止位置。
replace_regex:查找字符串str中第一次满足 regex的子串,并用新子串 替换。
replace_all_regex:查找字符串str中所有满足 regex的子串,并用新子串 替换。
erase_regex:查找字符串str中第一次满足 regex的子串,删除
erase_all_regex:查找字符串str中所有满足 regex的子串,删除
find_all_regex:查找字符串str中满足 regex的子串,返回子串 数组
split_regex:将字符串str中所有满足 regex的子串,分割。
join_if_regex:将子串 数组,按照 regex,连接,返回新的字符串。
举例
#include <boost/algorithm/string/regex.hpp>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/sequence_traits.hpp>
// equals predicate is used for result comparison
#include <boost/algorithm/string/predicate.hpp>
// Include unit test framework
#include <boost/test/unit_test.hpp>
#include <string>
#include <vector>
#include <iostream>
#include <boost/regex.hpp>
#include <boost/test/test_tools.hpp>
using namespace std;
using namespace boost;
static void find_test()
{
string str1("123a1cxxxa23cXXXa456c321");
const char* pch1="123a1cxxxa23cXXXa456c321";
regex rx("a[0-9]+c");
vector<int> vec1( str1.begin(), str1.end() );
vector<string> tokens;
// find results
iterator_range<string::iterator> nc_result;
iterator_range<string::const_iterator> cv_result;
iterator_range<vector<int>::iterator> nc_vresult;
iterator_range<vector<int>::const_iterator> cv_vresult;
iterator_range<const char*> ch_result;
// basic tests
nc_result=find_regex( str1, rx );
BOOST_CHECK(
( (nc_result.begin()-str1.begin()) == 3) &&
( (nc_result.end()-str1.begin()) == 6) );
cv_result=find_regex( str1, rx );
BOOST_CHECK(
( (cv_result.begin()-str1.begin()) == 3) &&
( (cv_result.end()-str1.begin()) == 6) );
ch_result=find_regex( pch1, rx );
BOOST_CHECK(( (ch_result.begin() - pch1 ) == 3) && ( (ch_result.end() - pch1 ) == 6 ) );
// multi-type comparison test
nc_vresult=find_regex( vec1, rx );
BOOST_CHECK(
( (nc_result.begin()-str1.begin()) == 3) &&
( (nc_result.end()-str1.begin()) == 6) );
cv_vresult=find_regex( vec1, rx );
BOOST_CHECK(
( (cv_result.begin()-str1.begin()) == 3) &&
( (cv_result.end()-str1.begin()) == 6) );
// find_all_regex test
find_all_regex( tokens, str1, rx );
BOOST_REQUIRE( tokens.size()==3 );
BOOST_CHECK( tokens[0]==string("a1c") );
BOOST_CHECK( tokens[1]==string("a23c") );
BOOST_CHECK( tokens[2]==string("a456c") );
// split_regex test
split_regex( tokens, str1, rx );
BOOST_REQUIRE( tokens.size()==4 );
BOOST_CHECK( tokens[0]==string("123") );
BOOST_CHECK( tokens[1]==string("xxx") );
BOOST_CHECK( tokens[2]==string("XXX") );
BOOST_CHECK( tokens[3]==string("321") );
}
static void join_test()
{
// Prepare inputs
vector<string> tokens1;
tokens1.push_back("xx");
tokens1.push_back("abc");
tokens1.push_back("xx");
#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
BOOST_CHECK( equals(join_if(tokens1, "-", regex("x+")), "xx-xx") );
BOOST_CHECK( equals(join_if(tokens1, "-", regex("[abc]+")), "abc") );
#else
BOOST_CHECK( equals(join_if_regex(tokens1, "-", regex("x+")), "xx-xx") );
BOOST_CHECK( equals(join_if_regex(tokens1, "-", regex("[abc]+")), "abc") );
#endif
}
static void replace_test()
{
string str1("123a1cxxxa23cXXXa456c321");
regex rx1("a([0-9]+)c");
regex rx2("([xX]+)");
regex rx3("_[^_]*_");
string fmt1("_A$1C_");
string fmt2("_xXx_");
vector<int> vec1( str1.begin(), str1.end() );
// immutable tests
// basic tests
BOOST_CHECK( replace_regex_copy( str1, rx1, fmt1 )==string("123_A1C_xxxa23cXXXa456c321") );
BOOST_CHECK( replace_all_regex_copy( str1, rx1, fmt1 )==string("123_A1C_xxx_A23C_XXX_A456C_321") );
BOOST_CHECK( erase_regex_copy( str1, rx1 )==string("123xxxa23cXXXa456c321") );
BOOST_CHECK( erase_all_regex_copy( str1, rx1 )==string(string("123xxxXXX321")) );
// output iterator variants test
string strout;
replace_regex_copy( back_inserter(strout), str1, rx1, fmt1 );
BOOST_CHECK( strout==string("123_A1C_xxxa23cXXXa456c321") );
strout.clear();
replace_all_regex_copy( back_inserter(strout), str1, rx1, fmt1 );
BOOST_CHECK( strout==string("123_A1C_xxx_A23C_XXX_A456C_321") );
strout.clear();
erase_regex_copy( back_inserter(strout), str1, rx1 );
BOOST_CHECK( strout==string("123xxxa23cXXXa456c321") );
strout.clear();
erase_all_regex_copy( back_inserter(strout), str1, rx1 );
BOOST_CHECK( strout==string("123xxxXXX321") );
strout.clear();
// in-place test
replace_regex( str1, rx1, fmt2 );
BOOST_CHECK( str1==string("123_xXx_xxxa23cXXXa456c321") );
replace_all_regex( str1, rx2, fmt1 );
BOOST_CHECK( str1==string("123__AxXxC___AxxxC_a23c_AXXXC_a456c321") );
erase_regex( str1, rx3 );
BOOST_CHECK( str1==string("123AxXxC___AxxxC_a23c_AXXXC_a456c321") );
erase_all_regex( str1, rx3 );
BOOST_CHECK( str1==string("123AxXxCa23ca456c321") );
}
int main( int argc, char* [] )
{
find_test();
join_test();
replace_test();
return 0;
}
源代码
namespace boost {
namespace algorithm {
// find_regex -----------------------------------------------//
//! Find regex algorithm
/*!
Search for a substring matching the given regex in the input.
\param Input A container which will be searched.
\param Rx A regular expression
\param Flags Regex options
\return
An \c iterator_range delimiting the match.
Returned iterator is either \c RangeT::iterator or
\c RangeT::const_iterator, depending on the constness of
the input parameter.
\note This function provides the strong exception-safety guarantee
*/
template<
typename RangeT,
typename CharT,
typename RegexTraitsT>
inline iterator_range<
BOOST_STRING_TYPENAME range_iterator<RangeT>::type >
find_regex(
RangeT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
iterator_range<BOOST_STRING_TYPENAME range_iterator<RangeT>::type> lit_input(::boost::as_literal(Input));
return ::boost::algorithm::regex_finder(Rx,Flags)(
::boost::begin(lit_input), ::boost::end(lit_input) );
}
// replace_regex --------------------------------------------------------------------//
//! Replace regex algorithm
/*!
Search for a substring matching given regex and format it with
the specified format.
The result is a modified copy of the input. It is returned as a sequence
or copied to the output iterator.
\param Output An output iterator to which the result will be copied
\param Input An input string
\param Rx A regular expression
\param Format Regex format definition
\param Flags Regex options
\return An output iterator pointing just after the last inserted character or
a modified copy of the input
\note The second variant of this function provides the strong exception-safety guarantee
*/
template<
typename OutputIteratorT,
typename RangeT,
typename CharT,
typename RegexTraitsT,
typename FormatStringTraitsT, typename FormatStringAllocatorT >
inline OutputIteratorT replace_regex_copy(
OutputIteratorT Output,
const RangeT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
match_flag_type Flags=match_default | format_default )
{
return ::boost::algorithm::find_format_copy(
Output,
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::regex_formatter( Format, Flags ) );
}
//! Replace regex algorithm
/*!
\overload
*/
template<
typename SequenceT,
typename CharT,
typename RegexTraitsT,
typename FormatStringTraitsT, typename FormatStringAllocatorT >
inline SequenceT replace_regex_copy(
const SequenceT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
match_flag_type Flags=match_default | format_default )
{
return ::boost::algorithm::find_format_copy(
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::regex_formatter( Format, Flags ) );
}
//! Replace regex algorithm
/*!
Search for a substring matching given regex and format it with
the specified format. The input string is modified in-place.
\param Input An input string
\param Rx A regular expression
\param Format Regex format definition
\param Flags Regex options
*/
template<
typename SequenceT,
typename CharT,
typename RegexTraitsT,
typename FormatStringTraitsT, typename FormatStringAllocatorT >
inline void replace_regex(
SequenceT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
match_flag_type Flags=match_default | format_default )
{
::boost::algorithm::find_format(
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::regex_formatter( Format, Flags ) );
}
// replace_all_regex --------------------------------------------------------------------//
//! Replace all regex algorithm
/*!
Format all substrings, matching given regex, with the specified format.
The result is a modified copy of the input. It is returned as a sequence
or copied to the output iterator.
\param Output An output iterator to which the result will be copied
\param Input An input string
\param Rx A regular expression
\param Format Regex format definition
\param Flags Regex options
\return An output iterator pointing just after the last inserted character or
a modified copy of the input
\note The second variant of this function provides the strong exception-safety guarantee
*/
template<
typename OutputIteratorT,
typename RangeT,
typename CharT,
typename RegexTraitsT,
typename FormatStringTraitsT, typename FormatStringAllocatorT >
inline OutputIteratorT replace_all_regex_copy(
OutputIteratorT Output,
const RangeT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
match_flag_type Flags=match_default | format_default )
{
return ::boost::algorithm::find_format_all_copy(
Output,
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::regex_formatter( Format, Flags ) );
}
//! Replace all regex algorithm
/*!
\overload
*/
template<
typename SequenceT,
typename CharT,
typename RegexTraitsT,
typename FormatStringTraitsT, typename FormatStringAllocatorT >
inline SequenceT replace_all_regex_copy(
const SequenceT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
match_flag_type Flags=match_default | format_default )
{
return ::boost::algorithm::find_format_all_copy(
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::regex_formatter( Format, Flags ) );
}
//! Replace all regex algorithm
/*!
Format all substrings, matching given regex, with the specified format.
The input string is modified in-place.
\param Input An input string
\param Rx A regular expression
\param Format Regex format definition
\param Flags Regex options
*/
template<
typename SequenceT,
typename CharT,
typename RegexTraitsT,
typename FormatStringTraitsT, typename FormatStringAllocatorT >
inline void replace_all_regex(
SequenceT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
match_flag_type Flags=match_default | format_default )
{
::boost::algorithm::find_format_all(
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::regex_formatter( Format, Flags ) );
}
// erase_regex --------------------------------------------------------------------//
//! Erase regex algorithm
/*!
Remove a substring matching given regex from the input.
The result is a modified copy of the input. It is returned as a sequence
or copied to the output iterator.
\param Output An output iterator to which the result will be copied
\param Input An input string
\param Rx A regular expression
\param Flags Regex options
\return An output iterator pointing just after the last inserted character or
a modified copy of the input
\note The second variant of this function provides the strong exception-safety guarantee
*/
template<
typename OutputIteratorT,
typename RangeT,
typename CharT,
typename RegexTraitsT >
inline OutputIteratorT erase_regex_copy(
OutputIteratorT Output,
const RangeT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
return ::boost::algorithm::find_format_copy(
Output,
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::empty_formatter( Input ) );
}
//! Erase regex algorithm
/*!
\overload
*/
template<
typename SequenceT,
typename CharT,
typename RegexTraitsT >
inline SequenceT erase_regex_copy(
const SequenceT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
return ::boost::algorithm::find_format_copy(
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::empty_formatter( Input ) );
}
//! Erase regex algorithm
/*!
Remove a substring matching given regex from the input.
The input string is modified in-place.
\param Input An input string
\param Rx A regular expression
\param Flags Regex options
*/
template<
typename SequenceT,
typename CharT,
typename RegexTraitsT >
inline void erase_regex(
SequenceT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
::boost::algorithm::find_format(
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::empty_formatter( Input ) );
}
// erase_all_regex --------------------------------------------------------------------//
//! Erase all regex algorithm
/*!
Erase all substrings, matching given regex, from the input.
The result is a modified copy of the input. It is returned as a sequence
or copied to the output iterator.
\param Output An output iterator to which the result will be copied
\param Input An input string
\param Rx A regular expression
\param Flags Regex options
\return An output iterator pointing just after the last inserted character or
a modified copy of the input
\note The second variant of this function provides the strong exception-safety guarantee
*/
template<
typename OutputIteratorT,
typename RangeT,
typename CharT,
typename RegexTraitsT >
inline OutputIteratorT erase_all_regex_copy(
OutputIteratorT Output,
const RangeT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
return ::boost::algorithm::find_format_all_copy(
Output,
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::empty_formatter( Input ) );
}
//! Erase all regex algorithm
/*!
\overload
*/
template<
typename SequenceT,
typename CharT,
typename RegexTraitsT >
inline SequenceT erase_all_regex_copy(
const SequenceT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
return ::boost::algorithm::find_format_all_copy(
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::empty_formatter( Input ) );
}
//! Erase all regex algorithm
/*!
Erase all substrings, matching given regex, from the input.
The input string is modified in-place.
\param Input An input string
\param Rx A regular expression
\param Flags Regex options
*/
template<
typename SequenceT,
typename CharT,
typename RegexTraitsT>
inline void erase_all_regex(
SequenceT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
::boost::algorithm::find_format_all(
Input,
::boost::algorithm::regex_finder( Rx, Flags ),
::boost::algorithm::empty_formatter( Input ) );
}
// find_all_regex ------------------------------------------------------------------//
//! Find all regex algorithm
/*!
This algorithm finds all substrings matching the give regex
in the input.
Each part is copied and added as a new element to the output container.
Thus the result container must be able to hold copies
of the matches (in a compatible structure like std::string) or
a reference to it (e.g. using the iterator range class).
Examples of such a container are \c std::vector<std::string>
or \c std::list<boost::iterator_range<std::string::iterator>>
\param Result A container that can hold copies of references to the substrings.
\param Input A container which will be searched.
\param Rx A regular expression
\param Flags Regex options
\return A reference to the result
\note Prior content of the result will be overwritten.
\note This function provides the strong exception-safety guarantee
*/
template<
typename SequenceSequenceT,
typename RangeT,
typename CharT,
typename RegexTraitsT >
inline SequenceSequenceT& find_all_regex(
SequenceSequenceT& Result,
const RangeT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
return ::boost::algorithm::iter_find(
Result,
Input,
::boost::algorithm::regex_finder(Rx,Flags) );
}
// split_regex ------------------------------------------------------------------//
//! Split regex algorithm
/*!
Tokenize expression. This function is equivalent to C strtok. Input
sequence is split into tokens, separated by separators. Separator
is an every match of the given regex.
Each part is copied and added as a new element to the output container.
Thus the result container must be able to hold copies
of the matches (in a compatible structure like std::string) or
a reference to it (e.g. using the iterator range class).
Examples of such a container are \c std::vector<std::string>
or \c std::list<boost::iterator_range<std::string::iterator>>
\param Result A container that can hold copies of references to the substrings.
\param Input A container which will be searched.
\param Rx A regular expression
\param Flags Regex options
\return A reference to the result
\note Prior content of the result will be overwritten.
\note This function provides the strong exception-safety guarantee
*/
template<
typename SequenceSequenceT,
typename RangeT,
typename CharT,
typename RegexTraitsT >
inline SequenceSequenceT& split_regex(
SequenceSequenceT& Result,
const RangeT& Input,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
return ::boost::algorithm::iter_split(
Result,
Input,
::boost::algorithm::regex_finder(Rx,Flags) );
}
// join_if ------------------------------------------------------------------//
#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
//! Conditional join algorithm
/*!
This algorithm joins all strings in a 'list' into one long string.
Segments are concatenated by given separator. Only segments that
match the given regular expression will be added to the result
This is a specialization of join_if algorithm.
\param Input A container that holds the input strings. It must be a container-of-containers.
\param Separator A string that will separate the joined segments.
\param Rx A regular expression
\param Flags Regex options
\return Concatenated string.
\note This function provides the strong exception-safety guarantee
*/
template<
typename SequenceSequenceT,
typename Range1T,
typename CharT,
typename RegexTraitsT >
inline typename range_value<SequenceSequenceT>::type
join_if(
const SequenceSequenceT& Input,
const Range1T& Separator,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
// Define working types
typedef typename range_value<SequenceSequenceT>::type ResultT;
typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
// Parse input
InputIteratorT itBegin=::boost::begin(Input);
InputIteratorT itEnd=::boost::end(Input);
// Construct container to hold the result
ResultT Result;
// Roll to the first element that will be added
while(
itBegin!=itEnd &&
!::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
// Add this element
if(itBegin!=itEnd)
{
detail::insert(Result, ::boost::end(Result), *itBegin);
++itBegin;
}
for(;itBegin!=itEnd; ++itBegin)
{
if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
{
// Add separator
detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
// Add element
detail::insert(Result, ::boost::end(Result), *itBegin);
}
}
return Result;
}
#else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
//! Conditional join algorithm
/*!
This algorithm joins all strings in a 'list' into one long string.
Segments are concatenated by given separator. Only segments that
match the given regular expression will be added to the result
This is a specialization of join_if algorithm.
\param Input A container that holds the input strings. It must be a container-of-containers.
\param Separator A string that will separate the joined segments.
\param Rx A regular expression
\param Flags Regex options
\return Concatenated string.
\note This function provides the strong exception-safety guarantee
*/
template<
typename SequenceSequenceT,
typename Range1T,
typename CharT,
typename RegexTraitsT >
inline typename range_value<SequenceSequenceT>::type
join_if_regex(
const SequenceSequenceT& Input,
const Range1T& Separator,
const basic_regex<CharT, RegexTraitsT>& Rx,
match_flag_type Flags=match_default )
{
// Define working types
typedef typename range_value<SequenceSequenceT>::type ResultT;
typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
// Parse input
InputIteratorT itBegin=::boost::begin(Input);
InputIteratorT itEnd=::boost::end(Input);
// Construct container to hold the result
ResultT Result;
// Roll to the first element that will be added
while(
itBegin!=itEnd &&
!::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
// Add this element
if(itBegin!=itEnd)
{
detail::insert(Result, ::boost::end(Result), *itBegin);
++itBegin;
}
for(;itBegin!=itEnd; ++itBegin)
{
if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
{
// Add separator
detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
// Add element
detail::insert(Result, ::boost::end(Result), *itBegin);
}
}
return Result;
}
#endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
} // namespace algorithm
// pull names into the boost namespace
using algorithm::find_regex;
using algorithm::replace_regex;
using algorithm::replace_regex_copy;
using algorithm::replace_all_regex;
using algorithm::replace_all_regex_copy;
using algorithm::erase_regex;
using algorithm::erase_regex_copy;
using algorithm::erase_all_regex;
using algorithm::erase_all_regex_copy;
using algorithm::find_all_regex;
using algorithm::split_regex;
#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
using algorithm::join_if;
#else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
using algorithm::join_if_regex;
#endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
} // namespace boost