UTF-8与GB2312之间的互换

相信一定有不少的程序开发人员时常会遇到字符编码的问题,而这个问题也是非常让人头痛的。因为这些都是潜在的错误,要找出这些错误也得要有这方面的开发经验才行。特别是在处理xml文档时 ,该问题的出现就更加的频繁了,有一次用java写服务器端程序,用vc写客户端与之交互。交互的协议都是用xml写的。结果在通讯时老是发现数据接受不正确。纳闷!于是用抓取网络数据包工具抓取数据,后来才发现原来是java上xml的头是这样的,而vc上默认的是GB2312。所以一遇到汉字数据就不正确了。去网上找资料,这方面的文章好象特别少,针对像这样的问题,下面我介绍一下我自己写的一个转换程序。当然,程序很简单。如果有画蛇添足的地方,还望各位高手一笑了之。

如果您对UTF-8、Unicode、GB2312等还是很陌生的话,请查看http://www.linuxforum.net/books/UTF-8-Unicode.html,我这里就不浪费口舌了。下面介绍一下WinAPI的两个函数:WideCharToMultiByte、MultiByteToWideChar。

函数原型:

01. int WideCharToMultiByte(
02. UINT CodePage, // code page
03. DWORD dwFlags, // performance and mapping flags
04. LPCWSTR lpWideCharStr, // wide-character string
05. int cchWideChar, // number of chars in string
06. LPSTR lpMultiByteStr, // buffer for new string
07. int cbMultiByte, // size of buffer
08. LPCSTR lpDefaultChar, // default for unmappable chars
09. LPBOOL lpUsedDefaultChar // set when default char used
10. ); //将宽字符转换成多个窄字符
11.  
12. int MultiByteToWideChar(
13. UINT CodePage, // code page
14. DWORD dwFlags, // character-type options
15. LPCSTR lpMultiByteStr, // string to map
16. int cbMultiByte, // number of bytes in string
17. LPWSTR lpWideCharStr, // wide-character buffer
18. int cchWideChar // size of buffer
19. );//将多个窄字符转换成宽字符

需要用到的一些函数:

01. CString CXmlProcess::HexToBin(CString string)//将16进制数转换成2进制
02. {
03. if( string == "0"return "0000";
04. if( string == "1"return "0001";
05. if( string == "2"return "0010";
06. if( string == "3"return "0011";
07. if( string == "4"return "0100";
08. if( string == "5"return "0101";
09. if( string == "6"return "0110";
10. if( string == "7"return "0111";
11. if( string == "8"return "1000";
12. if( string == "9"return "1001";
13. if( string == "a"return "1010";
14. if( string == "b"return "1011";
15. if( string == "c"return "1100";
16. if( string == "d"return "1101";
17. if( string == "e"return "1110";
18. if( string == "f"return "1111";
19. return "";
20. }
21.  
22.  
23. CString CXmlProcess::BinToHex(CString BinString)//将2进制数转换成16进制
24. {
25. if( BinString == "0000"return "0";
26. if( BinString == "0001"return "1";
27. if( BinString == "0010"return "2";
28. if( BinString == "0011"return "3";
29. if( BinString == "0100"return "4";
30. if( BinString == "0101"return "5";
31. if( BinString == "0110"return "6";
32. if( BinString == "0111"return "7";
33. if( BinString == "1000"return "8";
34. if( BinString == "1001"return "9";
35. if( BinString == "1010"return "a";
36. if( BinString == "1011"return "b";
37. if( BinString == "1100"return "c";
38. if( BinString == "1101"return "d";
39. if( BinString == "1110"return "e";
40. if( BinString == "1111"return "f";
41. return "";
42. }
43.  
44. int CXmlProcess::BinToInt(CString string)//2进制字符数据转换成10进制整型
45. {
46. int len =0;
47. int tempInt = 0;
48. int strInt = 0;
49. for(int i =0 ;i < string.GetLength() ;i ++)
50. {
51. tempInt = 1;
52. strInt = (int)string.GetAt(i)-48;
53. for(int k =0 ;k < 7-i ; k++)
54. {
55. tempInt = 2*tempInt;
56. }
57. len += tempInt*strInt;
58. }
59. return len;
60. }

UTF-8转换成GB2312先把UTF-8转换成Unicode.然后再把Unicode通过函数WideCharToMultiByte转换成GB2312

01. WCHAR* CXmlProcess::UTF_8ToUnicode(char *ustart)  //把UTF-8转换成Unicode
02. {
03. char char_one;
04. char char_two;
05. char char_three;
06. int Hchar;
07. int Lchar;
08. char uchar[2];
09. WCHAR *unicode;
10. CString string_one;
11. CString string_two;
12. CString string_three;
13. CString combiString;
14. char_one = *ustart;
15. char_two = *(ustart+1);
16. char_three = *(ustart+2);
17. string_one.Format("%x",char_one);
18. string_two.Format("%x",char_two);
19. string_three.Format("%x",char_three);
20. string_three = string_three.Right(2);
21. string_two = string_two.Right(2);
22. string_one = string_one.Right(2);
23. string_three = HexToBin(string_three.Left(1))+HexToBin(string_three.Right(1));
24. string_two = HexToBin(string_two.Left(1))+HexToBin(string_two.Right(1));
25. string_one = HexToBin(string_one.Left(1))+HexToBin(string_one.Right(1));
26. combiString = string_one +string_two +string_three;
27. combiString = combiString.Right(20);
28. combiString.Delete(4,2);
29. combiString.Delete(10,2);
30. Hchar = BinToInt(combiString.Left(8));
31. Lchar = BinToInt(combiString.Right(8));
32. uchar[1] = (char)Hchar;
33. uchar[0] = (char)Lchar;
34. unicode = (WCHAR *)uchar;
35. return unicode;
36. }
37.  
38. char * CXmlProcess::UnicodeToGB2312(unsigned short uData)  //把Unicode 转换成 GB2312
39. {
40. char *buffer ;
41. buffer = new char[sizeof(WCHAR)];
42. WideCharToMultiByte(CP_ACP,NULL,&uData,1,buffer,sizeof(WCHAR),NULL,NULL);
43. return buffer;
44. }

GB2312转换成UTF-8先把GB2312通过函数MultiByteToWideChar转换成Unicode.然后再把Unicode通过拆开Unicode后拼装成UTF-8

01. WCHAR * CXmlProcess::Gb2312ToUnicode(char *gbBuffer)  //GB2312 转换成 Unicode
02. {
03. WCHAR *uniChar;
04. uniChar = new WCHAR[1];
05. ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1);
06. return uniChar;
07. }
08. char * CXmlProcess::UnicodeToUTF_8(WCHAR *UniChar) // Unicode 转换成UTF-8
09. {
10. char *buffer;
11. CString strOne;
12. CString strTwo;
13. CString strThree;
14. CString strFour;
15. CString strAnd;
16. buffer = new char[3];
17. int hInt,lInt;
18. hInt = (int)((*UniChar)/256);
19. lInt = (*UniChar)%256;
20. CString string ;
21. string.Format("%x",hInt);
22. strTwo = HexToBin(string.Right(1));
23. string = string.Left(string.GetLength() - 1);
24. strOne = HexToBin(string.Right(1));
25. string.Format("%x",lInt);
26. strFour = HexToBin(string.Right(1));
27. string = string.Left(string.GetLength() -1);
28. strThree = HexToBin(string.Right(1));
29. strAnd = strOne +strTwo + strThree + strFour;
30. strAnd.Insert(0,"1110");
31. strAnd.Insert(8,"10");
32. strAnd.Insert(16,"10");
33. strOne = strAnd.Left(8);
34. strAnd = strAnd.Right(16);
35. strTwo = strAnd.Left(8);
36. strThree = strAnd.Right(8);
37. *buffer = (char)BinToInt(strOne);
38. buffer[1] = (char)BinToInt(strTwo);
39. buffer[2] = (char)BinToInt(strThree);
40. return buffer;
41. }

例子:将GB2312转换成UTF-8的调用:

01. char * CXmlProcess::translateCharToUTF_8(char *xmlStream, int len)
02. {
03. int newCharLen =0 ;
04. int oldCharLen = 0;
05. int revCharLen = len;
06. char* newCharBuffer;
07. char* finalCharBuffer;
08. char *buffer ;
09. CString string;
10. buffer  = new char[sizeof(WCHAR)];
11. newCharBuffer = new char[int(1.5*revCharLen)];//设置最大的一个缓冲区
12. while(oldCharLen < revCharLen)
13. {
14. if( *(xmlStream + oldCharLen) >= 0)
15. {
16. *(newCharBuffer+newCharLen) = *(xmlStream +oldCharLen);
17. newCharLen ++;
18. oldCharLen ++;
19. }//如果是英文直接复制就可以
20. else
21. {
22. WCHAR *pbuffer = this->Gb2312ToUnicode(xmlStream+oldCharLen);
23. buffer = this->UnicodeToUTF_8(pbuffer);
24. *(newCharBuffer+newCharLen) = *buffer;
25. *(newCharBuffer +newCharLen +1) = *(buffer + 1);
26. *(newCharBuffer +newCharLen +2) = *(buffer + 2);
27. newCharLen += 3;
28. oldCharLen += 2;
29. }
30. }
31. newCharBuffer[newCharLen] = ''\0'';
32. CString string1 ;
33. string1.Format("%s",newCharBuffer);
34. finalCharBuffer = new char[newCharLen+1];
35. memcpy(finalCharBuffer,newCharBuffer,newCharLen+1);
36. return finalCharBuffer;
37. }

程序都非常的简单,由于实在太穷。已经吃了两天的方便面。所以现在头昏,程序的详细说明就不写了。程序员到了像我这样的地步也真是少见。工资低没有办法。哎!!!!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值