功能:将字符串(包括汉字)分割成一个个单元,一个汉字是一个单元
如果是非汉字 则把连续的这些字符当成 一个单元。
1 #include <string.h> 2 #include <vector> 3 #include <iostream> 4 using namespace std; 5 6 void splitUtf8String(const char *pString, vector<string>& vecChars ) 7 { 8 size_t i = 0, length = 0; 9 while (i < strlen(pString)){ 10 if (pString[i] & 0x80){ 11 length = 3; 12 } 13 else if (pString[i] == ' ' || pString[i] == ';'){ 14 length = 1; 15 } 16 else{ 17 length = 1; 18 while (i+length < strlen(pString) && !(pString[i+length] & 0x80) 19 && pString[i+length] != ' ' && pString[i+length] != ';') 20 length ++; 21 } 22 vecChars.push_back(string(pString+i,length)); 23 i += length; 24 } 25 } 26 int main(int argc, char **argv){ 27 if(argc!=2){ 28 cerr << "error,please enter something to be split" << endl; 29 return 1; 30 } 31 vector<string> result; 32 splitUtf8String(argv[1], result); 33 for(vector<string>::iterator it = result.begin(); it != result.end(); it ++) 34 cout << *it << endl; 35 return 1; 36 }