对比的源程序(cpp) :
// 针对汉字的utf-8 和utf-16对比
#include "stdafx.h"
#include "windows.h"
#include <cstdint>
#include <iostream>
#include <bitset>
using namespace std;
int _tmain(int argc, _TCHAR* argv[])
{
char *pc = "中国";
cout << pc[0] << pc[1] << endl;// 中
wchar_t* lpString = L"hi中国";
wcout.imbue(locale("chs"));
wcout << lpString[3] << endl; // 国
cout << "------------wchar_t to utf-8----------------" << endl;
char uf8[10] = { 0 };
int nLenOfUf8, nReturnlen;
nLenOfUf8 = WideCharToMultiByte(CP_UTF8, 0, lpString, -1, NULL, 0, NULL, NULL);
//Header: Declared in Winnls.h; include Windows.h.
if (!nLenOfUf8)
return false;
nReturnlen = WideCharToMultiByte(CP_UTF8, 0, lpString, -1, uf8, nLenOfUf8, NULL, NULL);
if (!nReturnlen)
{
return false;
}
setlocale(LC_ALL, "");
cout << uf8[0] << uf8[1] << endl; // hi
cout << uf8 << " :" << strlen(uf8) << endl; // hi乱码 8
bitset<16> b(lpString[2]);
cout << "wchar_t: " << b << endl;// wchar_t (对后面的utf8就知道wchar_t就是utf-16)
bitset<8> b1(uf8[2]);//1110 XXXX // 高4位
bitset<8> b2(uf8[3]);//10XX XXXX // 中间6位
bitset<8> b3(uf8[4]);//10XX XXXX // 低6位
cout << "utf8: " << b1 << ' ' << b2 << ' ' << b3 << endl;
cout << "---------wchar_t is utf16----utf8 to utf16-------------------" << endl;
wchar_t ch[10] = { 0 };
bitset<16> b4(((uf8[2] & 0x0f) << 12)//根据规则计算,把'中' '国'由utf8转成utf16
+ ((uf8[3] & 0x3f) << 6) + (uf8[4] & 0x3f) );
cout << "utf-16: " << b4 << endl; // 中
ch[0] = ((uf8[2] & 0x0f) << 12) + ((uf8[3] & 0x3f) << 6) + (uf8[4] & 0x3f)