相信一定有不少的程序开发人员时常会遇到字符编码的问题,而这个问题也是非常让人头痛的。因为这些都是潜在的错误,要找出这些错误也得要有这方面的开发经验才行。特别是在处理xml文档时 ,该问题的出现就更加的频繁了,有一次用java写服务器端程序,用vc写客户端与之交互。交互的协议都是用xml写的。结果在通讯时老是发现数据接受不正确。纳闷!于是用抓取网络数据包工具抓取数据,后来才发现原来是java上xml的头是这样的,而vc上默认的是GB2312。所以一遇到汉字数据就不正确了。去网上找资料,这方面的文章好象特别少,针对像这样的问题,下面我介绍一下我自己写的一个转换程序。当然,程序很简单。如果有画蛇添足的地方,还望各位高手一笑了之。
如果您对UTF-8、Unicode、GB2312等还是很陌生的话,请查看http://www.linuxforum.net/books/UTF-8-Unicode.html,我这里就不浪费口舌了。下面介绍一下WinAPI的两个函数:WideCharToMultiByte、MultiByteToWideChar。
函数原型:
01.
int
WideCharToMultiByte(
02.
UINT
CodePage,
// code page
03.
DWORD
dwFlags,
// performance and mapping flags
04.
LPCWSTR
lpWideCharStr,
// wide-character string
05.
int
cchWideChar,
// number of chars in string
06.
LPSTR
lpMultiByteStr,
// buffer for new string
07.
int
cbMultiByte,
// size of buffer
08.
LPCSTR
lpDefaultChar,
// default for unmappable chars
09.
LPBOOL
lpUsedDefaultChar
// set when default char used
10.
);
//将宽字符转换成多个窄字符
11.
12.
int
MultiByteToWideChar(
13.
UINT
CodePage,
// code page
14.
DWORD
dwFlags,
// character-type options
15.
LPCSTR
lpMultiByteStr,
// string to map
16.
int
cbMultiByte,
// number of bytes in string
17.
LPWSTR
lpWideCharStr,
// wide-character buffer
18.
int
cchWideChar
// size of buffer
19.
);
//将多个窄字符转换成宽字符
需要用到的一些函数:
01.
CString CXmlProcess::HexToBin(CString string)
//将16进制数转换成2进制
02.
{
03.
if
( string ==
"0"
)
return
"0000"
;
04.
if
( string ==
"1"
)
return
"0001"
;
05.
if
( string ==
"2"
)
return
"0010"
;
06.
if
( string ==
"3"
)
return
"0011"
;
07.
if
( string ==
"4"
)
return
"0100"
;
08.
if
( string ==
"5"
)
return
"0101"
;
09.
if
( string ==
"6"
)
return
"0110"
;
10.
if
( string ==
"7"
)
return
"0111"
;
11.
if
( string ==
"8"
)
return
"1000"
;
12.
if
( string ==
"9"
)
return
"1001"
;
13.
if
( string ==
"a"
)
return
"1010"
;
14.
if
( string ==
"b"
)
return
"1011"
;
15.
if
( string ==
"c"
)
return
"1100"
;
16.
if
( string ==
"d"
)
return
"1101"
;
17.
if
( string ==
"e"
)
return
"1110"
;
18.
if
( string ==
"f"
)
return
"1111"
;
19.
return
""
;
20.
}
21.
22.
23.
CString CXmlProcess::BinToHex(CString BinString)
//将2进制数转换成16进制
24.
{
25.
if
( BinString ==
"0000"
)
return
"0"
;
26.
if
( BinString ==
"0001"
)
return
"1"
;
27.
if
( BinString ==
"0010"
)
return
"2"
;
28.
if
( BinString ==
"0011"
)
return
"3"
;
29.
if
( BinString ==
"0100"
)
return
"4"
;
30.
if
( BinString ==
"0101"
)
return
"5"
;
31.
if
( BinString ==
"0110"
)
return
"6"
;
32.
if
( BinString ==
"0111"
)
return
"7"
;
33.
if
( BinString ==
"1000"
)
return
"8"
;
34.
if
( BinString ==
"1001"
)
return
"9"
;
35.
if
( BinString ==
"1010"
)
return
"a"
;
36.
if
( BinString ==
"1011"
)
return
"b"
;
37.
if
( BinString ==
"1100"
)
return
"c"
;
38.
if
( BinString ==
"1101"
)
return
"d"
;
39.
if
( BinString ==
"1110"
)
return
"e"
;
40.
if
( BinString ==
"1111"
)
return
"f"
;
41.
return
""
;
42.
}
43.
44.
int
CXmlProcess::BinToInt(CString string)
//2进制字符数据转换成10进制整型
45.
{
46.
int
len =0;
47.
int
tempInt = 0;
48.
int
strInt = 0;
49.
for
(
int
i =0 ;i < string.GetLength() ;i ++)
50.
{
51.
tempInt = 1;
52.
strInt = (
int
)string.GetAt(i)-48;
53.
for
(
int
k =0 ;k < 7-i ; k++)
54.
{
55.
tempInt = 2*tempInt;
56.
}
57.
len += tempInt*strInt;
58.
}
59.
return
len;
60.
}
UTF-8转换成GB2312先把UTF-8转换成Unicode.然后再把Unicode通过函数WideCharToMultiByte转换成GB2312
01.
WCHAR
* CXmlProcess::UTF_8ToUnicode(
char
*ustart)
//把UTF-8转换成Unicode
02.
{
03.
char
char_one;
04.
char
char_two;
05.
char
char_three;
06.
int
Hchar;
07.
int
Lchar;
08.
char
uchar[2];
09.
WCHAR
*unicode;
10.
CString string_one;
11.
CString string_two;
12.
CString string_three;
13.
CString combiString;
14.
char_one = *ustart;
15.
char_two = *(ustart+1);
16.
char_three = *(ustart+2);
17.
string_one.Format(
"%x"
,char_one);
18.
string_two.Format(
"%x"
,char_two);
19.
string_three.Format(
"%x"
,char_three);
20.
string_three = string_three.Right(2);
21.
string_two = string_two.Right(2);
22.
string_one = string_one.Right(2);
23.
string_three = HexToBin(string_three.Left(1))+HexToBin(string_three.Right(1));
24.
string_two = HexToBin(string_two.Left(1))+HexToBin(string_two.Right(1));
25.
string_one = HexToBin(string_one.Left(1))+HexToBin(string_one.Right(1));
26.
combiString = string_one +string_two +string_three;
27.
combiString = combiString.Right(20);
28.
combiString.Delete(4,2);
29.
combiString.Delete(10,2);
30.
Hchar = BinToInt(combiString.Left(8));
31.
Lchar = BinToInt(combiString.Right(8));
32.
uchar[1] = (
char
)Hchar;
33.
uchar[0] = (
char
)Lchar;
34.
unicode = (
WCHAR
*)uchar;
35.
return
unicode;
36.
}
37.
38.
char
* CXmlProcess::UnicodeToGB2312(unsigned
short
uData)
//把Unicode 转换成 GB2312
39.
{
40.
char
*buffer ;
41.
buffer =
new
char
[
sizeof
(
WCHAR
)];
42.
WideCharToMultiByte(CP_ACP,NULL,&uData,1,buffer,
sizeof
(
WCHAR
),NULL,NULL);
43.
return
buffer;
44.
}
GB2312转换成UTF-8:先把GB2312通过函数MultiByteToWideChar转换成Unicode.然后再把Unicode通过拆开Unicode后拼装成UTF-8。
01.
WCHAR
* CXmlProcess::Gb2312ToUnicode(
char
*gbBuffer)
//GB2312 转换成 Unicode
02.
{
03.
WCHAR
*uniChar;
04.
uniChar =
new
WCHAR
[1];
05.
::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1);
06.
return
uniChar;
07.
}
08.
char
* CXmlProcess::UnicodeToUTF_8(
WCHAR
*UniChar)
// Unicode 转换成UTF-8
09.
{
10.
char
*buffer;
11.
CString strOne;
12.
CString strTwo;
13.
CString strThree;
14.
CString strFour;
15.
CString strAnd;
16.
buffer =
new
char
[3];
17.
int
hInt,lInt;
18.
hInt = (
int
)((*UniChar)/256);
19.
lInt = (*UniChar)%256;
20.
CString string ;
21.
string.Format(
"%x"
,hInt);
22.
strTwo = HexToBin(string.Right(1));
23.
string = string.Left(string.GetLength() - 1);
24.
strOne = HexToBin(string.Right(1));
25.
string.Format(
"%x"
,lInt);
26.
strFour = HexToBin(string.Right(1));
27.
string = string.Left(string.GetLength() -1);
28.
strThree = HexToBin(string.Right(1));
29.
strAnd = strOne +strTwo + strThree + strFour;
30.
strAnd.Insert(0,
"1110"
);
31.
strAnd.Insert(8,
"10"
);
32.
strAnd.Insert(16,
"10"
);
33.
strOne = strAnd.Left(8);
34.
strAnd = strAnd.Right(16);
35.
strTwo = strAnd.Left(8);
36.
strThree = strAnd.Right(8);
37.
*buffer = (
char
)BinToInt(strOne);
38.
buffer[1] = (
char
)BinToInt(strTwo);
39.
buffer[2] = (
char
)BinToInt(strThree);
40.
return
buffer;
41.
}
例子:将GB2312转换成UTF-8的调用:
01.
char
* CXmlProcess::translateCharToUTF_8(
char
*xmlStream,
int
len)
02.
{
03.
int
newCharLen =0 ;
04.
int
oldCharLen = 0;
05.
int
revCharLen = len;
06.
char
* newCharBuffer;
07.
char
* finalCharBuffer;
08.
char
*buffer ;
09.
CString string;
10.
buffer =
new
char
[
sizeof
(
WCHAR
)];
11.
newCharBuffer =
new
char
[
int
(1.5*revCharLen)];
//设置最大的一个缓冲区
12.
while
(oldCharLen < revCharLen)
13.
{
14.
if
( *(xmlStream + oldCharLen) >= 0)
15.
{
16.
*(newCharBuffer+newCharLen) = *(xmlStream +oldCharLen);
17.
newCharLen ++;
18.
oldCharLen ++;
19.
}
//如果是英文直接复制就可以
20.
else
21.
{
22.
WCHAR
*pbuffer =
this
->Gb2312ToUnicode(xmlStream+oldCharLen);
23.
buffer =
this
->UnicodeToUTF_8(pbuffer);
24.
*(newCharBuffer+newCharLen) = *buffer;
25.
*(newCharBuffer +newCharLen +1) = *(buffer + 1);
26.
*(newCharBuffer +newCharLen +2) = *(buffer + 2);
27.
newCharLen += 3;
28.
oldCharLen += 2;
29.
}
30.
}
31.
newCharBuffer[newCharLen] =
''
\0
''
;
32.
CString string1 ;
33.
string1.Format(
"%s"
,newCharBuffer);
34.
finalCharBuffer =
new
char
[newCharLen+1];
35.
memcpy
(finalCharBuffer,newCharBuffer,newCharLen+1);
36.
return
finalCharBuffer;
37.
}
程序都非常的简单,由于实在太穷。已经吃了两天的方便面。所以现在头昏,程序的详细说明就不写了。程序员到了像我这样的地步也真是少见。工资低没有办法。哎!!!!