Windows和linux的utf8,Linux 平台和 Windows平台下 Unicode与UTF-8互转

本文介绍了如何使用C++实现将UTF-8字符串转换为Unicode和Unicode转换回UTF-8的函数。通过`MultiByteToWideChar`和`WideCharToMultiByte`API,展示了字符串编码和解码的关键步骤,包括字符长度计算和编码格式转换。
摘要由CSDN通过智能技术生成

unsigned char * make_utf8_string(const wchar_t *unicode)

{int size = 0, index = 0, out_index = 0;

unsignedchar *out;

unsignedshortc;/*first calculate the size of the target string*/c= unicode[index++];while(c)

{if(c < 0x0080)

{

size+= 1;

}else if(c < 0x0800)

{

size+= 2;

}else{

size+= 3;

}

c= unicode[index++];

}out = (unsigned char*)malloc(size + 1);if (out ==NULL)returnNULL;

index= 0;

c= unicode[index++];while(c)

{if(c < 0x080)

{out[out_index++] = (unsigned char)c;

}else if(c < 0x800)

{out[out_index++] = 0xc0 | (c >> 6);out[out_index++] = 0x80 | (c & 0x3f);

}else{out[out_index++] = 0xe0 | (c >> 12);out[out_index++] = 0x80 | ((c >> 6) & 0x3f);out[out_index++] = 0x80 | (c & 0x3f);

}

c= unicode[index++];

}out[out_index] = 0x00;return out;

}

wchar_t* make_unicode_string(const unsigned char *utf8)

{int size = 0, index = 0, out_index = 0;

wchar_t*out;

unsignedcharc;/*first calculate the size of the target string*/c= utf8[index++];while(c)

{if((c & 0x80) == 0)

{

index+= 0;

}else if((c & 0xe0) == 0xe0)

{

index+= 2;

}else{

index+= 1;

}

size+= 1;

c= utf8[index++];

}out = (wchar_t*)malloc((size + 1) * sizeof(wchar_t));if (out ==NULL)returnNULL;

index= 0;

c= utf8[index++];while(c)

{if((c & 0x80) == 0)

{out[out_index++] =c;

}else if((c & 0xe0) == 0xe0)

{out[out_index] = (c & 0x1F) << 12;

c= utf8[index++];out[out_index] |= (c & 0x3F) << 6;

c= utf8[index++];out[out_index++] |= (c & 0x3F);

}else{out[out_index] = (c & 0x3F) << 6;

c= utf8[index++];out[out_index++] |= (c & 0x3F);

}

c= utf8[index++];

}out[out_index] = 0;return out;

}int StrUtil::utf8_encode(const char *from, char **to)

{

wchar_t*unicode;intwchars, err;

wchars= ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,

strlen(from), NULL, 0);if (wchars == 0)

{

fprintf(stderr,"Unicode translation error %d", GetLastError());return -1;

}

unicode= (wchar_t*)calloc(wchars + 1, sizeof(unsigned short));if(unicode ==NULL)

{

fprintf(stderr,"Out of memory processing string to UTF8");return -1;

}

err= ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,

strlen(from), unicode, wchars);if(err !=wchars)

{free(unicode);

fprintf(stderr,"Unicode encode error %d", GetLastError());return -1;

}/*On NT-based windows systems, we could use WideCharToMultiByte(), but

* MS doesn't actually have a consistent API across win32.*/

*to = (char *)make_utf8_string(unicode);free(unicode);return 0;

}int StrUtil::utf8_decode(const char *from, char **to)

{

wchar_t*unicode;intchars, err;/*On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but

* MS doesn't actually have a consistent API across win32.*/unicode= make_unicode_string((unsigned char*)from);if(unicode ==NULL)

{

fprintf(stderr,"Out of memory processing string from UTF8 to UNICODE16");return -1;

}

chars=::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,-1, NULL, 0, NULL, NULL);if(chars == 0)

{

fprintf(stderr,"Unicode translation error %d", GetLastError());free(unicode);return -1;

}*to = (char *)calloc(chars + 1, sizeof(unsigned char));if(*to ==NULL)

{

fprintf(stderr,"Out of memory processing string to local charset");free(unicode);return -1;

}

err=::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,-1, *to, chars, NULL, NULL);if(err !=chars)

{

fprintf(stderr,"Unicode decode error %d", GetLastError());free(unicode);free(*to);*to =NULL;return -1;

}free(unicode);return 0;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值