unsigned char * make_utf8_string(const wchar_t *unicode)
{int size = 0, index = 0, out_index = 0;
unsignedchar *out;
unsignedshortc;/*first calculate the size of the target string*/c= unicode[index++];while(c)
{if(c < 0x0080)
{
size+= 1;
}else if(c < 0x0800)
{
size+= 2;
}else{
size+= 3;
}
c= unicode[index++];
}out = (unsigned char*)malloc(size + 1);if (out ==NULL)returnNULL;
index= 0;
c= unicode[index++];while(c)
{if(c < 0x080)
{out[out_index++] = (unsigned char)c;
}else if(c < 0x800)
{out[out_index++] = 0xc0 | (c >> 6);out[out_index++] = 0x80 | (c & 0x3f);
}else{out[out_index++] = 0xe0 | (c >> 12);out[out_index++] = 0x80 | ((c >> 6) & 0x3f);out[out_index++] = 0x80 | (c & 0x3f);
}
c= unicode[index++];
}out[out_index] = 0x00;return out;
}
wchar_t* make_unicode_string(const unsigned char *utf8)
{int size = 0, index = 0, out_index = 0;
wchar_t*out;
unsignedcharc;/*first calculate the size of the target string*/c= utf8[index++];while(c)
{if((c & 0x80) == 0)
{
index+= 0;
}else if((c & 0xe0) == 0xe0)
{
index+= 2;
}else{
index+= 1;
}
size+= 1;
c= utf8[index++];
}out = (wchar_t*)malloc((size + 1) * sizeof(wchar_t));if (out ==NULL)returnNULL;
index= 0;
c= utf8[index++];while(c)
{if((c & 0x80) == 0)
{out[out_index++] =c;
}else if((c & 0xe0) == 0xe0)
{out[out_index] = (c & 0x1F) << 12;
c= utf8[index++];out[out_index] |= (c & 0x3F) << 6;
c= utf8[index++];out[out_index++] |= (c & 0x3F);
}else{out[out_index] = (c & 0x3F) << 6;
c= utf8[index++];out[out_index++] |= (c & 0x3F);
}
c= utf8[index++];
}out[out_index] = 0;return out;
}int StrUtil::utf8_encode(const char *from, char **to)
{
wchar_t*unicode;intwchars, err;
wchars= ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
strlen(from), NULL, 0);if (wchars == 0)
{
fprintf(stderr,"Unicode translation error %d\n", GetLastError());return -1;
}
unicode= (wchar_t*)calloc(wchars + 1, sizeof(unsigned short));if(unicode ==NULL)
{
fprintf(stderr,"Out of memory processing string to UTF8\n");return -1;
}
err= ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
strlen(from), unicode, wchars);if(err !=wchars)
{free(unicode);
fprintf(stderr,"Unicode encode error %d\n", GetLastError());return -1;
}/*On NT-based windows systems, we could use WideCharToMultiByte(), but
* MS doesn't actually have a consistent API across win32.*/
*to = (char *)make_utf8_string(unicode);free(unicode);return 0;
}int StrUtil::utf8_decode(const char *from, char **to)
{
wchar_t*unicode;intchars, err;/*On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but
* MS doesn't actually have a consistent API across win32.*/unicode= make_unicode_string((unsigned char*)from);if(unicode ==NULL)
{
fprintf(stderr,"Out of memory processing string from UTF8 to UNICODE16\n");return -1;
}
chars=::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,-1, NULL, 0, NULL, NULL);if(chars == 0)
{
fprintf(stderr,"Unicode translation error %d\n", GetLastError());free(unicode);return -1;
}*to = (char *)calloc(chars + 1, sizeof(unsigned char));if(*to ==NULL)
{
fprintf(stderr,"Out of memory processing string to local charset\n");free(unicode);return -1;
}
err=::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,-1, *to, chars, NULL, NULL);if(err !=chars)
{
fprintf(stderr,"Unicode decode error %d\n", GetLastError());free(unicode);free(*to);*to =NULL;return -1;
}free(unicode);return 0;
}