boost在路上...tokenizer

最新推荐文章于 2023-01-03 17:13:32 发布

zuroc

最新推荐文章于 2023-01-03 17:13:32 发布

阅读量90

点赞数

分类专栏： C++ 文章标签： F#

C++ 专栏收录该内容

34 篇文章 0 订阅

订阅专栏

boost在路上...tokenizer
tokenizer - Break of a string or other character sequence into a series of tokens, from John Bandela
tokenizer - 分解字串,提取内容.作者: John Bandela

例一:
// simple_example_1.cpp
#include<iostream>
#include<boost>
#include<string></string></boost></iostream>

int main(){
   using namespace std;
   using namespace boost;
   string s = "This is, a test";
   tokenizer<> tok(s);
   for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
       cout << *beg << "\n";
   }
}

输出
This
is
a
test

tokenizer默认将单词以空格和标点为边界分开.

例二:
#include<iostream>
#include<boost>
#include<string></string></boost></iostream>

int main(){
   using namespace std;
   using namespace boost;
   string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3";
   tokenizer<escaped_list_separator><char> > tok(s);
   for(tokenizer<escaped_list_separator><char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
       cout << *beg << "\n";
   }
}
输出
Field 1
putting quotes around fields, allows commas
Field 3</char></escaped_list_separator></char></escaped_list_separator>

双引号之间可以有标点.

例三:
// simple_example_3.cpp
#include<iostream>
#include<boost>
#include<string></string></boost></iostream>

int main(){
   using namespace std;
   using namespace boost;
   string s = "12252001";
   int offsets[] = {2,2,4};
   offset_separator f(offsets, offsets+3);
   tokenizer<offset_separator> tok(s,f);
   for(tokenizer<offset_separator>::iterator beg=tok.begin(); beg!=tok.end();++beg){
       cout << *beg << "\n";
   }
}</offset_separator></offset_separator>

把12252001分解为
12
25
2001

例4:
// char_sep_example_1.cpp
#include <iostream>
#include <boost>
#include <string></string></boost></iostream>

int main()
{
std::string str = ";!!;Hello|world||-foo--bar;yow;baz|";
typedef boost::tokenizer<boost::char_separator><char> >
    tokenizer;
boost::char_separator<char> sep("-;|");
tokenizer tokens(str, sep);
for (tokenizer::iterator tok_iter = tokens.begin();
       tok_iter != tokens.end(); ++tok_iter)
    std::cout << "<" << *tok_iter << "> ";
std::cout << "\n";
return EXIT_SUCCESS;
}</char></char></boost::char_separator>

输出
<hello> <world> <foo> <bar> <yow> <baz>
自定义分隔的标点</baz></yow></bar></foo></world></hello>

例5:
    // char_sep_example_2.cpp
    #include <iostream>
    #include <boost>
    #include <string></string></boost></iostream>

    int main()
    {
        std::string str = ";;Hello|world||-foo--bar;yow;baz|";
        typedef boost::tokenizer<boost::char_separator><char> >
            tokenizer;
        boost::char_separator<char> sep("-;", "|", boost::keep_empty_tokens);
        tokenizer tokens(str, sep);
        for (tokenizer::iterator tok_iter = tokens.begin();
             tok_iter != tokens.end(); ++tok_iter)
          std::cout << "<" << *tok_iter << "> ";
        std::cout << "\n";
        return EXIT_SUCCESS;
    }</char></char></boost::char_separator>

The output is:

例6:
    // char_sep_example_3.cpp
    #include <iostream>
    #include <boost>
    #include <string></string></boost></iostream>

    int main()
    {
       std::string str = "This is, a test";
       typedef boost::tokenizer<boost::char_separator><char> > Tok;
       boost::char_separator<char> sep; // default constructed
       Tok tok(str, sep);
       for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter)
         std::cout << "<" << *tok_iter << "> ";
       std::cout << "\n";
       return EXIT_SUCCESS;
    }</char></char></boost::char_separator>

The output is:

<this> <is> <,> <test>
保留标点但将它看作分隔符
</test></is></this>

zuroc

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
boost在路上...tokenizer

boost在路上...tokenizertokenizer - Break of a string or other character sequence into a series of tokens, from John Bandelatokenizer - 分解字串,提取内容.作者: John Bandela例一:// simple_example_1.cpp#include&lt...
复制链接

扫一扫

专栏目录