utf,unicode,ascii字符串之间的相互转换
作者:飘飘白云
utf8 转换到utf16 :
CStringW ConvertUTF8ToUTF16(const CStringA& utf8) { int wLen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, 0, 0); CStringW buf; WCHAR* dd = buf.GetBuffer(wLen); wLen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, dd, wLen); buf.ReleaseBuffer(wLen); return buf; }
unicode 转换到utf8 :
CStringA ConvertUnicodeToUTF8(const CStringW& unicode) { // wide char to multi char int wLen = WideCharToMultiByte(CP_UTF8, 0, unicode, -1, NULL, 0, NULL, NULL); CStringA buf; char* dd = buf.GetBuffer(wLen); WideCharToMultiByte(CP_UTF8, 0, unicode, -1, dd, wLen, NULL, NULL); buf.ReleaseBuffer(wLen); return buf; }
unicode 转换到ascii :
CStringA ConvertUnicodeToAnsi(const CStringW& unicode) { // wide char to multi char int wLen = WideCharToMultiByte(CP_ACP, 0, unicode, -1, NULL, 0, NULL, NULL); CStringA buf; char* dd = buf.GetBuffer(wLen); WideCharToMultiByte(CP_ACP, 0, unicode, -1, dd, wLen, NULL, NULL); buf.ReleaseBuffer(wLen); return buf; }
另有一个 utf16 转换到 utf8 的非调用 WideCharToMultiByte 的版本:
int ConvertUnicodeToUTF8(const wchar_t* src, int srcLen, unsigned char* dest, int destLen) { int i = 0; int outputlen = 0; /*bytes */ char tchar; if (!src || !dest) { return outputlen; } for (i = 0; i < srcLen; i++) { if (outputlen >= destLen - 1) { //overflow detected break; } // 0xxxxxxx if (src[i] < 0x80) { dest[outputlen++] = (unsigned char)(src[i]); } // 110xxxxx 10xxxxxx else if (src[i] < 0x800) { dest[outputlen++] = (src[i] >> 6 | 0xc0); dest[outputlen++] = (src[i] & 0x3f | 0x80); } // 1110xxxx 10xxxxxx 10xxxxxx else if (src[i] > 0xd800 && src[i] < 0xdfff) { tchar = (((src[i] & 0x3c0) >> 6) + 1); dest[outputlen++] = (tchar >> 2 | 0xf0); dest[outputlen++] = ((tchar & 0x03 | 0x80) | (src[i] & 0x3e) >> 2); } else { dest[outputlen++] = (src[i] >> 12 | 0xe0); dest[outputlen++] = (src[i] >> 6 & 0x3f | 0x80); dest[outputlen++] = (src[i] & 0x3f | 0x80); } } dest[outputlen] = '/0'; return outputlen; }