Windows和linux的utf8,Linux 平台和 Windows平台下 Unicode与UTF-8互转

最新推荐文章于 2021-05-25 22:30:40 发布

weixin_39645343

最新推荐文章于 2021-05-25 22:30:40 发布

阅读量126

点赞数

文章标签： Windows和linux的utf8

本文介绍了如何使用C++实现将UTF-8字符串转换为Unicode和Unicode转换回UTF-8的函数。通过`MultiByteToWideChar`和`WideCharToMultiByte`API，展示了字符串编码和解码的关键步骤，包括字符长度计算和编码格式转换。

摘要由CSDN通过智能技术生成

unsigned char * make_utf8_string(const wchar_t *unicode)

{int size = 0, index = 0, out_index = 0;

unsignedchar *out;

unsignedshortc;/*first calculate the size of the target string*/c= unicode[index++];while(c)

{if(c < 0x0080)

{

size+= 1;

}else if(c < 0x0800)

{

size+= 2;

}else{

size+= 3;

}

c= unicode[index++];

}out = (unsigned char*)malloc(size + 1);if (out ==NULL)returnNULL;

index= 0;

c= unicode[index++];while(c)

{if(c < 0x080)

{out[out_index++] = (unsigned char)c;

}else if(c < 0x800)

{out[out_index++] = 0xc0 | (c >> 6);out[out_index++] = 0x80 | (c & 0x3f);

}else{out[out_index++] = 0xe0 | (c >> 12);out[out_index++] = 0x80 | ((c >> 6) & 0x3f);out[out_index++] = 0x80 | (c & 0x3f);

}

c= unicode[index++];

}out[out_index] = 0x00;return out;

}

wchar_t* make_unicode_string(const unsigned char *utf8)

{int size = 0, index = 0, out_index = 0;

wchar_t*out;

unsignedcharc;/*first calculate the size of the target string*/c= utf8[index++];while(c)

{if((c & 0x80) == 0)

{

index+= 0;

}else if((c & 0xe0) == 0xe0)

{

index+= 2;

}else{

index+= 1;

}

size+= 1;

c= utf8[index++];

}out = (wchar_t*)malloc((size + 1) * sizeof(wchar_t));if (out ==NULL)returnNULL;

index= 0;

c= utf8[index++];while(c)

{if((c & 0x80) == 0)

{out[out_index++] =c;

}else if((c & 0xe0) == 0xe0)

{out[out_index] = (c & 0x1F) << 12;

c= utf8[index++];out[out_index] |= (c & 0x3F) << 6;

c= utf8[index++];out[out_index++] |= (c & 0x3F);

}else{out[out_index] = (c & 0x3F) << 6;

c= utf8[index++];out[out_index++] |= (c & 0x3F);

}

c= utf8[index++];

}out[out_index] = 0;return out;

}int StrUtil::utf8_encode(const char *from, char **to)

{

wchar_t*unicode;intwchars, err;

wchars= ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,

strlen(from), NULL, 0);if (wchars == 0)

{

fprintf(stderr,"Unicode translation error %d", GetLastError());return -1;

}

unicode= (wchar_t*)calloc(wchars + 1, sizeof(unsigned short));if(unicode ==NULL)

{

fprintf(stderr,"Out of memory processing string to UTF8");return -1;

}

err= ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,

strlen(from), unicode, wchars);if(err !=wchars)

{free(unicode);

fprintf(stderr,"Unicode encode error %d", GetLastError());return -1;

}/*On NT-based windows systems, we could use WideCharToMultiByte(), but

* MS doesn't actually have a consistent API across win32.*/

*to = (char *)make_utf8_string(unicode);free(unicode);return 0;

}int StrUtil::utf8_decode(const char *from, char **to)

{

wchar_t*unicode;intchars, err;/*On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but

* MS doesn't actually have a consistent API across win32.*/unicode= make_unicode_string((unsigned char*)from);if(unicode ==NULL)

{

fprintf(stderr,"Out of memory processing string from UTF8 to UNICODE16");return -1;

}

chars=::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,-1, NULL, 0, NULL, NULL);if(chars == 0)

{

fprintf(stderr,"Unicode translation error %d", GetLastError());free(unicode);return -1;

}*to = (char *)calloc(chars + 1, sizeof(unsigned char));if(*to ==NULL)

{

fprintf(stderr,"Out of memory processing string to local charset");free(unicode);return -1;

}

err=::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,-1, *to, chars, NULL, NULL);if(err !=chars)

{

fprintf(stderr,"Unicode decode error %d", GetLastError());free(unicode);free(*to);*to =NULL;return -1;

}free(unicode);return 0;

}

weixin_39645343

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Windows和linux的utf8,Linux 平台和 Windows平台下 Unicode与UTF-8互转

unsigned char * make_utf8_string(const wchar_t *unicode){int size = 0, index = 0, out_index = 0;unsignedchar *out;unsignedshortc;/*first calculate the size of the target string*/c= unicode[index++];wh...
复制链接

扫一扫