C/CPP UTF8编码转成汉字 \u7528\u6237\u4e0d

               

参考文章:http://www.qingfengju.com/article.asp?id=245


现在的网站,经常返回下面这样的字符串:
"error":"\u7528\u6237\u4e0d\u5b58\u5728\u6216\u5bc6\u7801\u9519\u8bef"}
其中的\u7528等就是汉字的UTF8编码了,如何将其还原成相应的字符呢?

代码如下:

#include <string>
using std::string;

string Utf8Code2String(char* szCode)string strRet = ""for (int i = 0; i < 4; i++) {  if (szCode[i] >= '0' && szCode[i] <= '9'continue;  if (szCode[i] >= 'A' && szCode[i] <= 'F'continue;  if (szCode[i] >= 'a' && szCode[i] <= 'f'continue;  return strRet; } char unicode_hex[5] = {0}; memcpy(unicode_hex, szCode, 4); unsigned int iCode = 0; sscanf_s(unicode_hex,"%04x", &iCode); wchar_t wchChar[4] = {0}; wchChar[0] = iCode; char szAnsi[8] = {0}; WideCharToMultiByte(CP_ACP, NULL, wchChar, 1, szAnsi, sizeof(szAnsi), NULL, NULL); strRet = string(szAnsi); return strRet;}string MyUnEscape(char* szUtf8Code)// {"error":"\u7528\u6237\u4e0d\u5b58\u5728\u6216\u5bc6\u7801\u9519\u8bef"} string strRet = ""char* szDelimiters = "\\u"char* pTok = strtok(szUtf8Code, szDelimiters);   while(pTok != NULL){  /*OutputDebugString(pTok);  OutputDebugString("\r\n");*/  if (strlen(pTok) >= 4)  {   strRet += Utf8Code2String(pTok);  }  pTok = strtok(NULL, szDelimiters); } return strRet;}




// 调用例子
MyUnEscape("test \u7528\u6237\u4e0d\u5b58\u5728\u6216\u5bc6\u7801\u9519\u8bef hahah ok");

-----------------------------------------------------------------------------------------

贴下参考文章的代码:

#include <string.h> #include <iostream>#include <string>using namespace std#include <atlconv.h> enum{    UNICODE_CALC_SIZE = 1,    UNICODE_GET_BYTES = 2}; //将unicode转义字符序列转换为内存中的unicode字符串int unicode_bytes(char* p_unicode_escape_chars,wchar_t *bytes,int flag){    /*    char* p_unicode_escape_chars="pp\\u4fddp\\u5b58\\u6210pp\\u529f0a12";     //通过此函数获知转换后需要的字节数    int n_length=unicode_bytes(p_unicode_escape_chars,NULL,UNICODE_CALC_SIZE);     //再次调用此函数,取得字节序列    wchar_t *bytes=new wchar_t[n_length+sizeof(wchar_t)];    unicode_bytes(p_unicode_escape_chars,bytes,UNICODE_GET_BYTES);    bytes[n_length]=0;     //此时的bytes中是转换后的字节序列    delete[] bytes;    */     int unicode_count=0;    int length=strlen(p_unicode_escape_chars);    for (int char_index=0;char_index<length;char_index++)    {       char unicode_hex[5];       memset(unicode_hex,0,5);        char ascii[2];       memset(ascii,0,2);        if (*(p_unicode_escape_chars+char_index)=='\\')       {           char_index++;           if (char_index<length)           {              if (*(p_unicode_escape_chars+char_index)=='u')              {                  if (flag==UNICODE_GET_BYTES)                  {                     memcpy(unicode_hex,p_unicode_escape_chars+char_index+1,4);                                         //sscanf不可以使用unsigned short类型                     //否则:Run-Time Check Failure #2 - Stack around the variable 'a' was corrupted.                     unsigned int a=0;                     sscanf_s(unicode_hex,"%04x",&a);                     bytes[unicode_count++]=a;                  }                  else if(flag==UNICODE_CALC_SIZE)                  {                     unicode_count++;                  }                  char_index+=4;              }           }       }       else       {           if (flag==UNICODE_GET_BYTES)           {              memcpy(ascii,p_unicode_escape_chars+char_index,1);              unsigned int a=0;              sscanf_s(ascii,"%c",&a);              bytes[unicode_count++]=a;           }           else if(flag==UNICODE_CALC_SIZE)           {              unicode_count++;           }       }    }     return unicode_count;} string UnEscape(char* p_unicode_escape_chars){    int nBytes=unicode_bytes(p_unicode_escape_chars,NULL,UNICODE_CALC_SIZE);     wchar_t *p_bytes=new wchar_t[nBytes+sizeof(wchar_t)];    unicode_bytes(p_unicode_escape_chars,p_bytes,UNICODE_GET_BYTES);    p_bytes[nBytes]=0;     USES_CONVERSION;    string cs_return=W2A((wchar_t*)p_bytes);     delete[] p_bytes;     return cs_return;} int _tmain(int argc, _TCHAR* argv[]){    // 发送成功条    // \u53d1\u9001\u6210\u529f1\u6761    char* p_unicode_escape_chars="\\u53d1\\u9001\\u6210\\u529f1\\u6761";    cout<<UnEscape(p_unicode_escape_chars)<<endl;    return 0;} 










           
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值