- /********************************************
- the tokenize function for std::string
- *********************************************/
- #include <string>
- #include <vector>
- #include <iostream>
- using namespace std;
- typedef basic_string<char>::size_type S_T;
- static const S_T npos = -1;
- trim指示是否保留空串,默认为保留。
- vector<string> tokenize(const string& src, string tok, bool trim=false, string null_subst="")
- {
- if( src.empty() || tok.empty() ) throw "tokenize: empty string\0";
- vector<string> v;
- S_T pre_index = 0, index = 0, len = 0;
- while( (index = src.find_first_of(tok, pre_index)) != npos )
- {
- if( (len = index-pre_index)!=0 )
- v.push_back(src.substr(pre_index, len));
- else if(trim==false)
- v.push_back(null_subst);
- pre_index = index+1;
- }
- string endstr = src.substr(pre_index);
- if( trim==false ) v.push_back( endstr.empty()? null_subst:endstr );
- else if( !endstr.empty() ) v.push_back(endstr);
- return v;
- }
- 使用一个完整的串delimit(而不是其中的某个字符)来分割src串,没有trim选项,即严格分割。
- vector<string> split(const string& src, string delimit, string null_subst="")
- {
- if( src.empty() || delimit.empty() ) throw "split: empty string\0";
- vector<string> v;
- S_T deli_len = delimit.size();
- long index = npos, last_search_position = 0;
- while( (index=src.find(delimit, last_search_position))!=npos )
- {
- if(index==last_search_position)
- v.push_back(null_subst);
- else
- v.push_back( src.substr(last_search_position, index-last_search_position) );
- last_search_position = index + deli_len;
- }
- string last_one = src.substr(last_search_position);
- v.push_back( last_one.empty()? null_subst:last_one );
- return v;
- }
- // test
- int main(void)
- {
- string src = ",ab,cde;,,fg,," ;
- string tok = ",;" ;
- vector<string> v1 = tokenize(src, tok ,true);
- vector<string> v2 = tokenize(src, tok ,false, "<null>");
- cout<<"-------------v1:"<<endl;
- for(int i=0; i<v1.size();i++)
- {
- cout<<v1[i].c_str()<<endl;
- }
- cout<<"-------------v2:"<<endl;
- for(int j=0; j<v2.size();j++)
- {
- cout<<v2[j].c_str()<<endl;
- }
- try{
- string s = "######123#4###56########789###";
- string del = "";//"###";
- vector<string> v3 = split(s, del, "<null>");
- cout<<"-------------v3:"<<endl;
- for(int k=0; k<v3.size();k++)
- {
- cout<<v3[k].c_str()<<endl;
- }
- }
- catch (char *s) {
- cout<<s<<endl;
- }
- return 0;
- }
2、CString版的:
- #include <stdio.h>
- #include <afx.h>
- /*
- * 该函数用delimits里的字符拆分s,传出一个CStringList指针pList,
- * 若trim为真,则不保留分割后的空串(注意不是空白字符)。比如:
- * Tokenize( "a,bc;,d,", ",;", &out_list, TRUE)
- * 会返回3个串:a、bc、d。
- * 若trim为FALSE,则用nullSubst用来替代分割后的空串,比如:
- * Tokenize( "a,bc;,d;", ",;", &out_list, FALSE,"[null]" )
- * 会返回5个串:a、bc、[null]、d、[null]。
- * trim默认为FALSE,nullSubst默认为空串。
- */
- void Tokenize(CString s, CString delimits, CStringList* pList, BOOL trim=FALSE, CString nullSubst="")
- {
- ASSERT( !s.IsEmpty() && !delimits.IsEmpty() );
- s += delimits[0];
- for( long index=-1; (index=s.FindOneOf((LPCTSTR)delimits))!=-1; )
- {
- if(index != 0) pList->AddTail( s.Left(index) );
- else if(!trim) pList->AddTail(nullSubst);
- s = s.Right(s.GetLength()-index-1);
- }
- }
- /*
- * 类似java字符串的split()方法。
- * 使用一个完整的串delimit(而不是其中的某个字符)来分割src串,没有trim选项,
- * 即严格分割。num用来确定最多分割为多少个串,如果是0(默认),则按照delimit
- * 分割,若为1,则返回源串。
- */
- void Split(const CString& src, CString delimit, CStringList* pOutList, int num=0, CString nullSubst="")
- {
- ASSERT( !src.IsEmpty() && !delimit.IsEmpty() );
- if(num==1)
- {
- pOutList->AddTail(src);
- return;
- }
- int deliLen = delimit.GetLength();
- long index = -1, lastSearchPosition = 0, cnt = 0;
- while( (index=src.Find(delimit, lastSearchPosition))!=-1 )
- {
- if(index==lastSearchPosition)
- pOutList->AddTail(nullSubst);
- else
- pOutList->AddTail(src.Mid(lastSearchPosition, index-lastSearchPosition));
- lastSearchPosition = index + deliLen;
- if(num)
- {
- ++cnt;
- if(cnt+1==num) break;
- }
- }
- CString lastOne = src.Mid(lastSearchPosition);
- pOutList->AddTail( lastOne.IsEmpty()? nullSubst:lastOne);
- }
- // test
- int main(void)
- {
- CString s = ",ab;cde,f,,;gh,,";
- CString sub = ",;";
- CStringList list1,list2;
- Tokenize(s,sub,&list1,TRUE,"no use"); // <-----
- printf("-------[Tokenize_trim]-------\n");
- POSITION pos1 = list1.GetHeadPosition();
- while( pos1!= NULL )
- {
- printf( list1.GetNext(pos1) );
- printf("\n");
- }
- Tokenize(s,sub,&list2,FALSE,"[null]"); // <-----
- printf("------[Tokenize_no_trim]-----\n");
- POSITION pos2 = list2.GetHeadPosition();
- while( pos2!= NULL )
- {
- printf( list2.GetNext(pos2) );
- printf("\n");
- }
- CStringList list3;
- s = "###0123###567######89###1000###";
- sub = "###";
- Split(s,sub,&list3, 3, "<null>"); // <-----
- printf("------[Split]-----\n");
- POSITION pos3 = list3.GetHeadPosition();
- while( pos3!= NULL )
- {
- printf( list3.GetNext(pos3) );
- printf("\n");
- }
- return 0;
- }
标准C++字符串string以及MFC6.0字符串CString的tokenize和split函数。
1、标准串的:
转载于:https://blog.51cto.com/vopit/495515