GB2312和GBK每一个汉字由2个字节组成,这2个字节的ASCII码大小分别是:
gb2312:
high8 = 0xa1-->0xfe (161 - 254)
low8 = 0xa1-->0xfe (161 - 254)
gbk:
high8 = 0x80-->0xfe (128 - 254)
low8 = 0x40-->0xfe (64 - 254)
GB2312和GBK的区别:http://zhidao.baidu.com/question/40269499
汉字ASCII对照表的打印方法:
- // 打印ASCII汉字编码表
- for(int i = 129; i < 256; ++i) // 129 = 0x81
- {
- for(int j = 64; j < 256; ++j) // 64 = 0x40
- {
- char pchar[3];
- pchar[0] = i;
- pchar[1] = j;
- pchar[2] = '/0';
- cout << pchar << " " << i << "," << j << " ";
- }
- }
知道一个汉字,输出其ASCII码:
- // 知道一个汉字,获取其ASCII编码
- char * pchar = "中";
- //需要先把pchar的每个字符转成unsigned char,
- // 直接用int i = pchar[0],得到的是负值
- unsigned char ci = pchar[0];
- int i = ci;
- unsigned char cj = pchar[1];
- int j = cj;
- cout << pchar << " " << i << "," << j << endl;
- // 输出214,208
知道汉字的ASCII码,输出汉字:
- // 知道汉字的ASCII码输出汉字
- char pchar[3];
- pchar[0] = 214;
- pchar[1] = 208;
- pchar[2] = '/0';
- cout << pchar << endl;
应用,去除文件中除汉字、字母、数字以外的字符:
- // 去除文件中除汉字、字母、数字以外的字符
- // 文件必须是ANSI编码
- CString sFile = readLocalFile(sname);
- char * pTemp = sFile.GetBuffer();
- CString stoWrite = "";
- for(int i = 0; i < strlen(pTemp); ++i)
- {
- int ch = pTemp[i];
- if(ch < 0)
- {
- // 使用unsigned char 读取可以获取128 - 255 之间的数值
- unsigned char c = pTemp[i];
- int itemp = c;
- // 范围选择170 - 254是为了去掉中文标点及其他字符,范围请参考汉字ASCII对照表
- if(itemp >= 170 && itemp <= 254)
- {
- char * pchar = new char[3];
- pchar[2] = '/0';
- pchar[0] = pTemp[i];
- pchar[1] = pTemp[i+1];
- //cout << pchar;
- stoWrite += pchar;
- delete [] pchar;
- }
- // 此时(ch < 0)两个char表示一个汉字,所以跳过第二个char
- ++i;
- }
- else if( (ch >= 97 && ch <= 122) || (ch >= 48 && ch <= 57))
- {
- //cout << pTemp[i];
- stoWrite += pTemp[i];
- }
- }
- writeLocalFile(stoWrite, rname);