有个场景是如果字符串超出一定的长度需要进行截断,由于std::string是按字节存储,直接substr会导致utf8被错误的截断,导致客户端显示异常。写了个用来处理UTF8编码下的截断函数:
#include <iostream>
#include <string>
using std::cout;
using std::endl;
std::string Utf8SubStr(const std::string&name, size_t need) {
size_t i=0;
size_t j=0;
while (i<need && j<name.length()) {
unsigned char c = (unsigned char)name[j++];
i += ((c & 0xc0) != 0x80);
}
while (j<name.length()) {
unsigned char c = (unsigned char)name[j];
if ((c & 0xc0) == 0x80) {
j++;
} else {
break;
}
}
return name.substr(0, j);
}
int main(int argc, char *argv[]) {
std::string name("h你好");
cout<<Utf8SubStr(name, 2)<<endl;
return 0;
}