jsoncpp Unicode转中文解决方法

最新推荐文章于 2024-07-30 15:49:25 发布

雨过云舒]

最新推荐文章于 2024-07-30 15:49:25 发布

阅读量2.1k

点赞数 3

文章标签： c++

本文链接：https://blog.csdn.net/oqingtianyouyue/article/details/110293709

版权

jsoncpp Unicode转中文解决方法

之前在调用百度API进行图像识别时遇到了中文显示的问题，写个博客记录下解决方法
jsoncpp toStyledString()函数在解析unicode中文时不会自动转换为中文格式，而是出现“\u5e73\u548c\u671b”的形式，我借用了网上的两个函数整合一下就可以实现转码

decodeUnicode函数

string decodeUnicode(string theString) {

    char aChar;
    int len = theString.length();
    vector<char> outBuffer;
    for (int x = 0; x < len;) {
        aChar = theString[x++];
        if (aChar == '\\') {
            aChar = theString[x++];
            if (aChar == 'u') {
                // Read the xxxx  
                unsigned int value = 0;
                for (int i = 0; i < 4; i++) {
                    aChar = theString[x++];
                    switch (aChar) {
                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                    case '8':
                    case '9':
                        value = (value << 4) + aChar - '0';
                        break;
                    case 'a':
                    case 'b':
                    case 'c':
                    case 'd':
                    case 'e':
                    case 'f':
                        value = (value << 4) + 10 + aChar - 'a';
                        break;
                    case 'A':
                    case 'B':
                    case 'C':
                    case 'D':
                    case 'E':
                    case 'F':
                        value = (value << 4) + 10 + aChar - 'A';
                        break;
                    default:
                        ;
                    }

                }
                //outBuffer.append((char)value);
                //outBuffer.push_back((char)(value/256));
                //outBuffer.push_back((char)(value % 256));
                string value2 = codePointToUTF8(value);
                outBuffer.insert(outBuffer.end(), value2.begin(), value2.end());
            }
            else {
                if (aChar == 't')
                    aChar = '\t';
                else if (aChar == 'r')
                    aChar = '\r';
                else if (aChar == 'n')
                    aChar = '\n';
                else if (aChar == 'f')
                    aChar = '\f';
                //outBuffer.append(aChar);
                outBuffer.push_back((char)aChar);
            }
        }
        else
            outBuffer.push_back((char)aChar);//outBuffer.append(aChar);
    }
    //return outBuffer.toString();
    string result(outBuffer.begin(), outBuffer.end());
    return result;
}

codePointToUTF8()

/// Converts a unicode code-point to UTF-8.
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
    JSONCPP_STRING result;

    // based on description from http://en.wikipedia.org/wiki/UTF-8

    if (cp <= 0x7f) {
        result.resize(1);
        result[0] = static_cast<char>(cp);
    }
    else if (cp <= 0x7FF) {
        result.resize(2);
        result[1] = static_cast<char>(0x80 | (0x3f & cp));
        result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
    }
    else if (cp <= 0xFFFF) {
        if (cp >= 0x4E00 && cp <= 0x9FA5 || (cp >= 0xF900 && cp <= 0xFA2D))
        {//这部分完成将十六进制数转为中文
            wchar_t src[2] = { 0 };
            char dest[5] = { 0 };
            src[0] = static_cast<wchar_t>(cp);
            std::string curLocale = setlocale(LC_ALL, NULL);
            setlocale(LC_ALL, "chs");
            wcstombs_s(NULL, dest, 5, src, 2);
            result = dest;
            setlocale(LC_ALL, curLocale.c_str());
        }
        else {
            result.resize(3);
            result[2] = static_cast<char>(0x80 | (0x3f & cp));
            result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
            result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
        }
    }
    else if (cp <= 0x10FFFF) {
        result.resize(4);
        result[3] = static_cast<char>(0x80 | (0x3f & cp));
        result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
        result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
        result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
    }

    return result;
}

decodeUnicode()函数将\u5e73的后四位数字由十六进制转为十进制存放在value(unsigned int类型）后调用codePointToUTF8()，codePointToUTF8()函数将value解析为string类型返回

另外，若调用result[“result”][1][“root”].asString()会出现中文繁体乱码，这是因为jsoncpp会自动用UTF-8的方式对中文编码，需要用UTF8ToGB()函数转为GBK的形式再打印。

参考博客
https://blog.csdn.net/xiaohu_2012/article/details/14454299.
//unicodetoChinese
https://blog.csdn.net/YAOJINGKAO/article/details/52222552?utm_source=blogxgwz0&utm_medium=distribute.pc_relevant.none-task-blog-title-3&spm=1001.2101.3001.4242.
//codePointToUTF8()
https://blog.csdn.net/xiaohu_2012/article/details/14454299
// UTF8ToGB()
//UnicodeToChinese()忘记出处在哪儿了，找到了补上 -_-ll

小白的第一篇博客，如有不足望指正^_^
完整代码如下：

#include <iostream>
#include <json/json.h>
#include <string>
#include <atlstr.h>
#include "image_classify.h"
#include <vector>
using namespace std;

/// Converts a unicode code-point to UTF-8.
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
    JSONCPP_STRING result;

    // based on description from http://en.wikipedia.org/wiki/UTF-8

    if (cp <= 0x7f) {
        result.resize(1);
        result[0] = static_cast<char>(cp);
    }
    else if (cp <= 0x7FF) {
        result.resize(2);
        result[1] = static_cast<char>(0x80 | (0x3f & cp));
        result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
    }
    else if (cp <= 0xFFFF) {
        if (cp >= 0x4E00 && cp <= 0x9FA5 || (cp >= 0xF900 && cp <= 0xFA2D))
        {
            wchar_t src[2] = { 0 };
            char dest[5] = { 0 };
            src[0] = static_cast<wchar_t>(cp);
            std::string curLocale = setlocale(LC_ALL, NULL);
            setlocale(LC_ALL, "chs");
            wcstombs_s(NULL, dest, 5, src, 2);
            result = dest;
            setlocale(LC_ALL, curLocale.c_str());
        }
        else {
            result.resize(3);
            result[2] = static_cast<char>(0x80 | (0x3f & cp));
            result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
            result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
        }
    }
    else if (cp <= 0x10FFFF) {
        result.resize(4);
        result[3] = static_cast<char>(0x80 | (0x3f & cp));
        result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
        result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
        result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
    }

    return result;
}

string decodeUnicode(string theString) {

    char aChar;
    int len = theString.length();
    vector<char> outBuffer;
    for (int x = 0; x < len;) {
        aChar = theString[x++];
        if (aChar == '\\') {
            aChar = theString[x++];
            if (aChar == 'u') {
                // Read the xxxx  
                unsigned int value = 0;
                for (int i = 0; i < 4; i++) {
                    aChar = theString[x++];
                    switch (aChar) {
                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                    case '8':
                    case '9':
                        value = (value << 4) + aChar - '0';
                        break;
                    case 'a':
                    case 'b':
                    case 'c':
                    case 'd':
                    case 'e':
                    case 'f':
                        value = (value << 4) + 10 + aChar - 'a';
                        break;
                    case 'A':
                    case 'B':
                    case 'C':
                    case 'D':
                    case 'E':
                    case 'F':
                        value = (value << 4) + 10 + aChar - 'A';
                        break;
                    default:
                        ;
                    }

                }
                //outBuffer.append((char)value);
                //outBuffer.push_back((char)(value/256));
                //outBuffer.push_back((char)(value % 256));
                string value2 = codePointToUTF8(value);
                outBuffer.insert(outBuffer.end(), value2.begin(), value2.end());
            }
            else {
                if (aChar == 't')
                    aChar = '\t';
                else if (aChar == 'r')
                    aChar = '\r';
                else if (aChar == 'n')
                    aChar = '\n';
                else if (aChar == 'f')
                    aChar = '\f';
                //outBuffer.append(aChar);
                outBuffer.push_back((char)aChar);
            }
        }
        else
            outBuffer.push_back((char)aChar);//outBuffer.append(aChar);
    }
    //return outBuffer.toString();
    string result(outBuffer.begin(), outBuffer.end());
    return result;
}
//UnicodeToChinese()这个函数代码里没有用到，可将'\u2132'转为对于的CString中文
bool UnicodeToChinese(string str, CString& cstr)
{
    int i = 0;
    int j = 0;
    int len = 0;

    len = str.length();
    if (len <= 0)
    {
        return false;
    }

    int nValue = 0;
    WCHAR* pWchar;
    wchar_t* szHex;
    char strchar[6] = { '0','x','\0' };

    for (i = 0; i < len; i++)
    {
        if (str[i] == 'u')
        {
            for (j = 2; j < 6; j++)
            {
                i++;
                strchar[j] = str[i];
            }

            USES_CONVERSION;
            szHex = A2W(strchar);

            StrToIntExW(szHex, STIF_SUPPORT_HEX, &nValue);
            pWchar = (WCHAR*)&nValue;

            cstr = cstr + pWchar;
        }
    }
    return true;
}

string UTF8ToGB(const char* str)
{
    string result;
    WCHAR* strSrc;
    LPSTR szRes;

    int i = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
    strSrc = new WCHAR[i + 1];
    MultiByteToWideChar(CP_UTF8, 0, str, -1, strSrc, i);

    i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL);
    szRes = new CHAR[i + 1];
    WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL);

    result = szRes;
    delete[]strSrc;
    delete[]szRes;
    return result;
}

int main() {
    
    std::string app_id = "xxxxxxx";
    std::string api_key = "xxxxxxxxx";
    std::string secret_key = "xxxxxxxxxxxxxxx";

    aip::Imageclassify client(app_id, api_key, secret_key);
   
    Json::Value result;
    std::string image;
    aip::get_file_content("E:/智子.jpg", &image);

    //调用通用物体识别
    result = client.advanced_general(image, aip::null);

    string str = result.toStyledString();
    string str2 = result["result"][1]["root"].asString();
    
    //string  m_Unicode = "english\\u5f85\\u96ea\\u521d\\u5e73\\u548c\\u671b\\u65ad\\u79cb\\u9ad8";
    cout << decodeUnicode(str) << endl;
    
    cout << UTF8ToGB(str2.c_str()) << endl;
}