/*
UTF-8 valid format list:
0xxxxxxx
110xxxxx 10xxxxxx
1110xxxx 10xxxxxx 10xxxxxx
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
std::string filter_none_utf8_chars(const std::string &src)
{
std::stringstream ss;
for (int i = 0; i < src.length(); )
{
char chr = src[i];
int bytes=0;
if ((chr & 0x80) == 0)
{
ss<<src[i];
i++;
continue;
}
if ((chr & 0xE0) == 0xC0) //110x xxxx
{
bytes = 2;
}
else if ((chr & 0xF0) == 0xE0) //1110 xxxx
{
bytes = 3;
}
else if ((chr & 0xF8) == 0xF0) //1111 0xxx
{
bytes = 4;
}
else if ((chr & 0xFC) == 0xF8) //1111 10xx
{
bytes =
过滤非utf-8字符
最新推荐文章于 2022-11-04 17:26:35 发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)