由于项目中用到了JSON数据结构,客户端采用VC编写,服务端数据接口采用PHP编写,遇到了JSON中文Unicode编码后,客户端出现了乱码的情形。
网上有一个方法比较好用,就是修改json_reader.cpp中的codePointToUTF8函数。
修改为以下:
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
JSONCPP_STRING result;
// based on description from http://en.wikipedia.org/wiki/UTF-8
if (cp <= 0x7f) {
result.resize(1);
result[0] = static_cast<char>(cp);
} else if (cp <= 0x7FF) {
result.resize(2);
result[1] = static_cast<char>(0x80 | (0x3f & cp));
result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
} else if (cp <= 0xFFFF) {
if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D))
{
wchar_t src[2] = { 0 };
char dest[5] = { 0 };
src[0] = static_cast<wchar_t>(cp);
std::string curLocale = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "chs");
wcstombs_s(NULL, dest, 5, src, 2);
result = dest;
setlocale(LC_ALL, curLocale.c_str());
}
else
{
result.resize(3);
result[2] = static_cast<char>(0x80 | (0x3f & cp));
//result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
//result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
}
} else if (cp <= 0x10FFFF) {
result.resize(4);
result[3] = static_cast<char>(0x80 | (0x3f & cp));
result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
}
return result;
}
经过以上修改,中文确定能显示了,但是中文的标点符号仍为乱码,于是查找了中文标点的UNicode编码,只要在上面处理中文汉字的地方,加入标点编码的处理即可。
句号 0x3002 。
问号 0xFF1F ?
叹号 0xFF01 !
逗号 0xFF0C ,
顿号 0x3001 、
分号 0xFF1B ;
冒号 0xFF1A :
引号 0x300C 「
0x300D 」
引号 0x300E 『
0x300F 』
引号 0x2018 ‘
0x2019 ’
引号 0x201C “
0x201D ”
括号 0xFF08 (
0xFF09 )
括号 0x3014 〔
0x3015 〕
括号 0x3010 【
0x3011 】
破折号 0x2014 —
省略号 0x2026 …
连接号 0x2013 –
间隔号 0xFF0E .
书名号 0x300A 《
0x300B 》
书名号 0x3008 〈
0x3009 〉
最终修改后的函数为:
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
JSONCPP_STRING result;
// based on description from http://en.wikipedia.org/wiki/UTF-8
if (cp <= 0x7f) {
result.resize(1);
result[0] = static_cast<char>(cp);
} else if (cp <= 0x7FF) {
result.resize(2);
result[1] = static_cast<char>(0x80 | (0x3f & cp));
result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
} else if (cp <= 0xFFFF) {
if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D) || cp == 0x3002 || cp == 0xFF1F || cp == 0xFF01 || cp == 0xFF0C || cp == 0x3001 || cp == 0xFF1B || cp == 0xFF1A || cp == 0x300C || cp == 0x300D || cp == 0x300E || cp == 0x300F || cp == 0x2018 || cp == 0x2019 || cp == 0x201C || cp == 0x201D || cp == 0xFF08 || cp == 0xFF09 || cp == 0x3014 || cp == 0x3015 || cp == 0x3010 || cp == 0x3011 || cp == 0x2014 || cp == 0x2026 || cp == 0x2013 || cp == 0xFF0E || cp == 0x300A || cp == 0x300B || cp == 0x3008 || cp == 0x3009)
{
wchar_t src[2] = { 0 };
char dest[5] = { 0 };
src[0] = static_cast<wchar_t>(cp);
std::string curLocale = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "chs");
wcstombs_s(NULL, dest, 5, src, 2);
result = dest;
setlocale(LC_ALL, curLocale.c_str());
}
else
{
result.resize(3);
result[2] = static_cast<char>(0x80 | (0x3f & cp));
//result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
//result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
}
} else if (cp <= 0x10FFFF) {
result.resize(4);
result[3] = static_cast<char>(0x80 | (0x3f & cp));
result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
}
return result;
}
即可完美解决中文乱码问题。