//文本转换为UNICODE
//pInByte 从文本文件读入的数据
//iInBytesLen 原始数据字节长度
//pWchString 返回转换后的UNICODE数据(以0结束)
//返回转换后UNICODE数据长度
int ConvertText2Unicode(const BYTE *pInByte, const int iInBytesLen, LPWSTR *pWchString)
{
int iWChDataLen = 0;
if(pInByte && iInBytesLen > 0 && pWchString)
{
WCHAR *pWchData = NULL;
if(iInBytesLen > 3
&& pInByte[0] == '\xEF' && pInByte[1] == '\xBB' && pInByte[2] == '\xBF')//UTF8-BOM
{
int icp = CP_UTF8;
LPCSTR pSrcIn = (LPCSTR)(pInByte+3); //skip head
int iSrcLen = iInBytesLen-3;
iWChDataLen = MultiByteToWideChar(icp, 0, pSrcIn, iSrcLen, NULL, 0);
pWchData = new WCHAR[iWChDataLen + 1];
MultiByteToWideChar(icp, 0, pSrcIn, iSrcLen, pWchData, iWChDataLen);
pWchData[iWChDataLen] = 0;
}
else if(iInBytesLen > 2
&& pInByte[0] == '\xFF' && pInByte[1] == '\xFE') //UNICODE
{
LPCWSTR pSrcIn = (LPCWSTR)(pInByte+2); //skip head
iWChDataLen = (iInBytesLen-2)/2;
pWchData = new WCHAR[iWChDataLen + 1];
memcpy(pWchData, pSrcIn, iWChDataLen*sizeof(WCHAR));
pWchData[iWChDataLen] = 0;
}
else if(iInBytesLen > 2
&& pInByte[0] == '\xFE' && pInByte[1] == '\xFF') //UNICODE Big-endian
{
const BYTE *pSrcIn = pInByte+2; //skip head
iWChDataLen = (iInBytesLen-2)/2;
pWchData = new WCHAR[iWChDataLen + 1];
//大小端转换
for(int iSrc=0,iDst=0; iDst<iWChDataLen; iSrc+=2, iDst++)
{
pWchData[iDst] = MAKEWORD(pSrcIn[iSrc+1], pSrcIn[iSrc+0]);
}
pWchData[iWChDataLen] = 0;
}
else if(iInBytesLen > 0) //常规文本
{
int icp = CP_THREAD_ACP; //按当前线程代码页转换
LPCSTR pSrcIn = (LPCSTR)pInByte;
iWChDataLen = MultiByteToWideChar(icp, 0, pSrcIn, iInBytesLen, NULL, 0);
pWchData = new WCHAR[iWChDataLen + 1];
MultiByteToWideChar(icp, 0, pSrcIn, iInBytesLen, pWchData, iWChDataLen);
pWchData[iWChDataLen] = 0;
}
else
{
}
*pWchString = pWchData;
}
return iWChDataLen;
}
//测试函数
{
BYTE *pInByte = NULL;
int iInBytesLen = 0;
WCHAR *pWchData = NULL;
int iWChDataLen = 0;
//读入文件
try
{
CFile file(szInFile, CFile::modeRead);
iInBytesLen = (int)file.GetLength();
pInByte = new BYTE[iInBytesLen + 2];
memset(pInByte, 0, iInBytesLen + 2);
file.Read(pInByte, iInBytesLen);
file.Close();
pInByte[iInBytesLen] = 0;
}
catch (CFileException* e)
{
e->ReportError();
e->Delete();
}
if(pInByte && iInBytesLen > 0)
{
iWChDataLen = ConvertText2Unicode(pInByte, iInBytesLen, &pWchData);
if(iWChDataLen > 0 && pWchData)
{
OutputDebugStringW(pWchData);
OutputDebugStringW(L"\r\n");
}
}
if(pWchData)
delete [] pWchData;
if(pInByte)
delete [] pInByte;
}