GB2312和GBK每一个汉字由2个字节组成,这2个字节的ASCII码大小分别是:
gb2312:
high8 = 0xa1–>0xfe (161 - 254)
low8 = 0xa1–>0xfe (161 - 254)
gbk:
high8 = 0x80–>0xfe (128 - 254)
low8 = 0x40–>0xfe (64 - 254)
GB2312和GBK的区别:http://zhidao.baidu.com/question/40269499
汉字ASCII对照表的打印方法:
// 打印ASCII汉字编码表
for(int i = 129; i < 256; ++i) // 129 = 0x81
{
for(int j = 64; j < 256; ++j) // 64 = 0x40
{
char pchar[3];
pchar[0] = i;
pchar[1] = j;
pchar[2] = '/0';
cout << pchar << " " << i << "," << j << " ";
}
}
知道一个汉字,输出其ASCII码:
void MakeChinese(int high, int low)
{
char szTemp[4] = { 0 };
wchar_t wszChar[sizeof(szTemp)];
szTemp[0] = (char)high;
szTemp[1] = (char)low;
unsigned short *hanzi = (unsigned short *)szTemp;
//mbstowcs(wszChar, szTemp, sizeof(wszChar));
printf("拼接汉字->%ls\n", hanzi[0]);
}
int main()
{
setlocale(LC_ALL, "");
wchar_t Chinese[] = L"高";
size_t len = wcslen(Chinese);
cout << len << endl;
wprintf(L"%ls\n", Chinese);
const char *Param = (char *)Chinese;
unsigned char high = (unsigned)Param[0]; // 216
unsigned char low = (unsigned)Param[1]; // 154
MakeChinese((int)high, (int)low);
system("pause");
return 0;
}
char Chinese[] = "高" <--> wchar_t Chinese[] = L"高"
这两种汉字的存储形式,以及存储的编码又有什么区别了,可以继续深入下…