一、wcstombs
使用函数 wcstombs() 前,先 setlocale(),如果系统没有zh_CN.utf8时,setlocale(LC_ALL, "zh_CN.utf8") 会失败,wcstombs() 转码也会失败。
char* locname = setlocale(LC_ALL, "zh_CN.utf8");
char utf8_confname[MAX_CN_LEN] = { 0 };
//int utflen = wcstombs(NULL, info.conf_name.c_str(), 0);//计算转换后的长度
wstring sss = L"中文test";
int ret = wcstombs(utf8_confname, sss.c_str(), MAX_CN_LEN);
locale -a 查看当前系统支持的字符编码方式
当没有 zh_CN.utf8 时,需要安装 apt-get install -y language-pack-zh-hans
打印字符串的16进制编码:
wstring sss = L"中文test";
int ret = wcstombs(utf8_confname, sss.c_str(), MAX_CN_LEN);
for (int i = 0; i < (int)sss.length(); ++i)
{
printf("L-中文test:0x%x", sss[i]);
}
for (int i = 0; i < (int)sizeof(utf8_confname); ++i)
{
printf("utf8_confname[中文test]: 0x%x", utf8_confname[i]);
}
for (int i = 0; i < (int)info.conf_name.length(); ++i)
{
printf("info.conf_name:0x%x", info.conf_name[i]);
}
L-中文test:0x4e2d
L-中文test:0x6587
L-中文test:0x74
L-中文test:0x65
L-中文test:0x73
L-中文test:0x74
utf8_confname[中文test]: 0xffffffe4
utf8_confname[中文test]: 0xffffffb8
utf8_confname[中文test]: 0xffffffad
utf8_confname[中文test]: 0xffffffe6
utf8_confname[中文test]: 0xffffff96
utf8_confname[中文test]: 0xffffff87
二、按UTF8编码规则转换
inline std::wstring UTF8ToUnicode(std::string const& utf8)
{
std::wstring res = L"";
if (utf8.empty())
{
return res;
}
for (size_t i = 0; i < utf8.size();)
{
auto c = (unsigned char)utf8[i];
wchar_t wide_char = 0;
if ((c & 0x80) == 0)
{
wide_char = c;
++i;
}
else if ((c & 0xE0) == 0xC0) ///< 110x-xxxx 10xx-xxxx
{
if (i + 2 > utf8.size())
break;
wide_char = (wchar_t(c) & 0x3F) << 6;
wide_char |= (wchar_t(utf8[i + 1]) & 0x3F);
i += 2;
}
else if ((c & 0xF0) == 0xE0) ///< 1110-xxxx 10xx-xxxx 10xx-xxxx
{
if (i + 3 > utf8.size())
break;
wide_char = (wchar_t(c) & 0x1F) << 12;
wide_char |= (wchar_t(utf8[i + 1]) & 0x3F) << 6;
wide_char |= (wchar_t(utf8[i + 2]) & 0x3F);
i += 3;
}
else if ((c & 0xF8) == 0xF0) ///< 1111-0xxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
{
if (i + 4 > utf8.size())
break;
wide_char = (wchar_t(c) & 0x0F) << 18;
wide_char |= (wchar_t(utf8[i + 1]) & 0x3F) << 12;
wide_char |= (wchar_t(utf8[i + 2]) & 0x3F) << 6;
wide_char |= (wchar_t(utf8[i + 3]) & 0x3F);
i += 4;
}
else///< 1111-10xx 10xx-xxxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
{
if (i + 4 > utf8.size())
break;
wide_char = (wchar_t(c) & 0x07) << 24;
wide_char |= (wchar_t(utf8[i + 1]) & 0x3F) << 18;
wide_char |= (wchar_t(utf8[i + 2]) & 0x3F) << 12;
wide_char |= (wchar_t(utf8[i + 3]) & 0x3F) << 6;
wide_char |= (wchar_t(utf8[i + 4]) & 0x3F);
i += 4;
}
res.push_back(wide_char);
}
return res;
}
三、其他
mysql++库设置中文到数据库会乱码,转utf-8码后设置也是乱码,结论mysql++库不支持中文设置
查看汉字的编码链接:查看字符编码(UTF-8) (mytju.com)