jsoncpp Unicode转中文解决方法
之前在调用百度API进行图像识别时遇到了中文显示的问题,写个博客记录下解决方法
jsoncpp toStyledString()函数在解析unicode中文时不会自动转换为中文格式,而是出现“\u5e73\u548c\u671b”的形式,我借用了网上的两个函数整合一下就可以实现转码
decodeUnicode函数
string decodeUnicode(string theString) {
char aChar;
int len = theString.length();
vector<char> outBuffer;
for (int x = 0; x < len;) {
aChar = theString[x++];
if (aChar == '\\') {
aChar = theString[x++];
if (aChar == 'u') {
// Read the xxxx
unsigned int value = 0;
for (int i = 0; i < 4; i++) {
aChar = theString[x++];
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
;
}
}
//outBuffer.append((char)value);
//outBuffer.push_back((char)(value/256));
//outBuffer.push_back((char)(value % 256));
string value2 = codePointToUTF8(value);
outBuffer.insert(outBuffer.end(), value2.begin(), value2.end());
}
else {
if (aChar == 't')
aChar = '\t';
else if (aChar == 'r')
aChar = '\r';
else if (aChar == 'n')
aChar = '\n';
else if (aChar == 'f')
aChar = '\f';
//outBuffer.append(aChar);
outBuffer.push_back((char)aChar);
}
}
else
outBuffer.push_back((char)aChar);//outBuffer.append(aChar);
}
//return outBuffer.toString();
string result(outBuffer.begin(), outBuffer.end());
return result;
}
codePointToUTF8()
/// Converts a unicode code-point to UTF-8.
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
JSONCPP_STRING result;
// based on description from http://en.wikipedia.org/wiki/UTF-8
if (cp <= 0x7f) {
result.resize(1);
result[0] = static_cast<char>(cp);
}
else if (cp <= 0x7FF) {
result.resize(2);
result[1] = static_cast<char>(0x80 | (0x3f & cp));
result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
}
else if (cp <= 0xFFFF) {
if (cp >= 0x4E00 && cp <= 0x9FA5 || (cp >= 0xF900 && cp <= 0xFA2D))
{//这部分完成将十六进制数转为中文
wchar_t src[2] = { 0 };
char dest[5] = { 0 };
src[0] = static_cast<wchar_t>(cp);
std::string curLocale = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "chs");
wcstombs_s(NULL, dest, 5, src, 2);
result = dest;
setlocale(LC_ALL, curLocale.c_str());
}
else {
result.resize(3);
result[2] = static_cast<char>(0x80 | (0x3f & cp));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
}
}
else if (cp <= 0x10FFFF) {
result.resize(4);
result[3] = static_cast<char>(0x80 | (0x3f & cp));
result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
}
return result;
}
decodeUnicode()函数将\u5e73的后四位数字由十六进制转为十进制存放在value(unsigned int类型)后调用codePointToUTF8(),codePointToUTF8()函数将value解析为string类型返回
另外,若调用result[“result”][1][“root”].asString()会出现中文繁体乱码,这是因为jsoncpp会自动用UTF-8的方式对中文编码,需要用UTF8ToGB()函数转为GBK的形式再打印。
参考博客
https://blog.csdn.net/xiaohu_2012/article/details/14454299.
//unicodetoChinese
https://blog.csdn.net/YAOJINGKAO/article/details/52222552?utm_source=blogxgwz0&utm_medium=distribute.pc_relevant.none-task-blog-title-3&spm=1001.2101.3001.4242.
//codePointToUTF8()
https://blog.csdn.net/xiaohu_2012/article/details/14454299
// UTF8ToGB()
//UnicodeToChinese()忘记出处在哪儿了,找到了补上 -_-ll
小白的第一篇博客,如有不足望指正^_^
完整代码如下:
#include <iostream>
#include <json/json.h>
#include <string>
#include <atlstr.h>
#include "image_classify.h"
#include <vector>
using namespace std;
/// Converts a unicode code-point to UTF-8.
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
JSONCPP_STRING result;
// based on description from http://en.wikipedia.org/wiki/UTF-8
if (cp <= 0x7f) {
result.resize(1);
result[0] = static_cast<char>(cp);
}
else if (cp <= 0x7FF) {
result.resize(2);
result[1] = static_cast<char>(0x80 | (0x3f & cp));
result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
}
else if (cp <= 0xFFFF) {
if (cp >= 0x4E00 && cp <= 0x9FA5 || (cp >= 0xF900 && cp <= 0xFA2D))
{
wchar_t src[2] = { 0 };
char dest[5] = { 0 };
src[0] = static_cast<wchar_t>(cp);
std::string curLocale = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "chs");
wcstombs_s(NULL, dest, 5, src, 2);
result = dest;
setlocale(LC_ALL, curLocale.c_str());
}
else {
result.resize(3);
result[2] = static_cast<char>(0x80 | (0x3f & cp));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
}
}
else if (cp <= 0x10FFFF) {
result.resize(4);
result[3] = static_cast<char>(0x80 | (0x3f & cp));
result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
}
return result;
}
string decodeUnicode(string theString) {
char aChar;
int len = theString.length();
vector<char> outBuffer;
for (int x = 0; x < len;) {
aChar = theString[x++];
if (aChar == '\\') {
aChar = theString[x++];
if (aChar == 'u') {
// Read the xxxx
unsigned int value = 0;
for (int i = 0; i < 4; i++) {
aChar = theString[x++];
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
;
}
}
//outBuffer.append((char)value);
//outBuffer.push_back((char)(value/256));
//outBuffer.push_back((char)(value % 256));
string value2 = codePointToUTF8(value);
outBuffer.insert(outBuffer.end(), value2.begin(), value2.end());
}
else {
if (aChar == 't')
aChar = '\t';
else if (aChar == 'r')
aChar = '\r';
else if (aChar == 'n')
aChar = '\n';
else if (aChar == 'f')
aChar = '\f';
//outBuffer.append(aChar);
outBuffer.push_back((char)aChar);
}
}
else
outBuffer.push_back((char)aChar);//outBuffer.append(aChar);
}
//return outBuffer.toString();
string result(outBuffer.begin(), outBuffer.end());
return result;
}
//UnicodeToChinese()这个函数代码里没有用到,可将'\u2132'转为对于的CString中文
bool UnicodeToChinese(string str, CString& cstr)
{
int i = 0;
int j = 0;
int len = 0;
len = str.length();
if (len <= 0)
{
return false;
}
int nValue = 0;
WCHAR* pWchar;
wchar_t* szHex;
char strchar[6] = { '0','x','\0' };
for (i = 0; i < len; i++)
{
if (str[i] == 'u')
{
for (j = 2; j < 6; j++)
{
i++;
strchar[j] = str[i];
}
USES_CONVERSION;
szHex = A2W(strchar);
StrToIntExW(szHex, STIF_SUPPORT_HEX, &nValue);
pWchar = (WCHAR*)&nValue;
cstr = cstr + pWchar;
}
}
return true;
}
string UTF8ToGB(const char* str)
{
string result;
WCHAR* strSrc;
LPSTR szRes;
int i = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
strSrc = new WCHAR[i + 1];
MultiByteToWideChar(CP_UTF8, 0, str, -1, strSrc, i);
i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL);
szRes = new CHAR[i + 1];
WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL);
result = szRes;
delete[]strSrc;
delete[]szRes;
return result;
}
int main() {
std::string app_id = "xxxxxxx";
std::string api_key = "xxxxxxxxx";
std::string secret_key = "xxxxxxxxxxxxxxx";
aip::Imageclassify client(app_id, api_key, secret_key);
Json::Value result;
std::string image;
aip::get_file_content("E:/智子.jpg", &image);
//调用通用物体识别
result = client.advanced_general(image, aip::null);
string str = result.toStyledString();
string str2 = result["result"][1]["root"].asString();
//string m_Unicode = "english\\u5f85\\u96ea\\u521d\\u5e73\\u548c\\u671b\\u65ad\\u79cb\\u9ad8";
cout << decodeUnicode(str) << endl;
cout << UTF8ToGB(str2.c_str()) << endl;
}