//判断文件类型
DWORD CHXScriptReal::CheckFileType( HANDLE hFile )
{
HXScriptFileCheck sfc;
DWORD dwcbSize;
if( ReadFile( hFile, &sfc, sizeof( sfc ), &dwcbSize, NULL ) && ( dwcbSize > sizeof( sfc )))
{
if( IsBinFile( &sfc ))
return HXSF_CODETYPE_BIN;
}
if( dwcbSize >= 3 && sfc.byBom[ 0 ] == 0xEF && sfc.byBom[ 1 ] == 0xBB && sfc.byBom[ 2 ] == 0xBF )
{
SetFilePointer( hFile, 3, NULL, FILE_BEGIN );
return HXSF_CODETYPE_UTF8;
}
else if( dwcbSize >= 2 && sfc.byBom[ 0 ] == 0xFF && sfc.byBom[ 1 ] == 0xFE )
{
SetFilePointer( hFile, 2, NULL, FILE_BEGIN );
return HXSF_CODETYPE_UNICODE;
}
else if( dwcbSize >= 2 && sfc.byBom[ 0 ] == 0xFE && sfc.byBom[ 1 ] == 0xFF )
{
SetFilePointer( hFile, 2, NULL, FILE_BEGIN );
return HXSF_CODETYPE_UNICODE_BIGENDIAN;
}
else
{
SetFilePointer( hFile, 0, NULL, FILE_BEGIN );
return HXSF_CODETYPE_ANSI;
}
}
//读取一个字符
WCHAR CHXLexer::ReadNextCharFromFile()
{
char btChar;
WCHAR ch;
DWORD dwReaded;
assert( m_hFile != NULL && m_hFile != INVALID_HANDLE_VALUE );
switch( m_dwCodeType )
{
case HXSF_CODETYPE_UNICODE:
if(( ! ReadFile( m_hFile, &ch, sizeof( WCHAR ), &dwReaded, NULL )) || ( dwReaded != sizeof( WCHAR )))
ch = 0;
break;
case HXSF_CODETYPE_UNICODE_BIGENDIAN:
if( ReadFile( m_hFile, &ch, sizeof( WCHAR ), &dwReaded, NULL ) && ( dwReaded == sizeof( WCHAR )))
{
WCHAR chTmp = ch;
ch = chTmp << 8;
ch |= ( chTmp >> 8 );
}
else
ch = 0;
break;
case HXSF_CODETYPE_UTF8:
if( ReadFile( m_hFile, &btChar, 1, &dwReaded, NULL ) && ( dwReaded == 1 ))
{
char szch[ 8 ];
WCHAR szwch[ 2 ];
int n, i;
if(( btChar & 0x80 ) == 0x00 )
n = 1;
else if(( btChar & 0xE0 ) == 0xC0 )
n = 2;
else if(( btChar & 0xF0 ) == 0xE0 )
n = 3;
else if(( btChar & 0xF8 ) == 0xF0 )
n = 4;
else if(( btChar & 0xFC ) == 0xF8 )
n = 5;
else if(( btChar & 0xFE ) == 0xFC )
n = 6;
else
n = 0;
szch[ 0 ] = btChar;
for( i = 1; i < n; ++ i )
{
if( ReadFile( m_hFile, &btChar, 1, &dwReaded, NULL ) && ( dwReaded == 1 ))
szch[ i ] = btChar;
else
break;
}
szch[ i ] = 0;
if( ::MultiByteToWideChar( CP_UTF8, 0, szch, i, szwch, 2 ) != 0 )
ch = *szwch;
else
ch = 0;
}
else
ch = 0;
break;
case HXSF_CODETYPE_ANSI:
if( ReadFile( m_hFile, &btChar, 1, &dwReaded, NULL ) && ( dwReaded == 1 ))
{
char szch[ 4 ];
WCHAR szwch[ 2 ];
int n = 1;
szch[ 0 ] = btChar;
if(( btChar & 0x80 ) == 0x80 )
{
if( ReadFile( m_hFile, &btChar, 1, &dwReaded, NULL ) && ( dwReaded == 1 ))
{
szch[ 1 ] = btChar;
szch[ 2 ] = 0;
++n;
}
else
szch[ 1 ] = 0;
}
else
szch[ 1 ] = 0;
if( ::MultiByteToWideChar( CP_ACP, 0, szch, n, szwch, 2 ) == 1 )
ch = *szwch;
else
ch = 0;
}
else
ch = 0;
break;
}
return ch;
}
//判断一个缓冲区是否为UTF8编码
BOOL CHXScriptReal::IsTextUTF8( BYTE * pszBuffer, int ncb )
{
int i = 0;
while( i < ncb )
{
int step = 0;
if(( pszBuffer[ i ] & 0x80) == 0x00 )
{
step = 1;
}
else if(( pszBuffer[ i ] & 0xe0 ) == 0xc0 )
{
if( i + 1 >= ncb )
return FALSE;
if(( pszBuffer[ i + 1 ] & 0xc0 ) != 0x80 )
return FALSE;
step = 2;
}
else if(( pszBuffer[ i ] & 0xf0 ) == 0xe0 )
{
if( i + 2 >= ncb )
return FALSE;
if(( pszBuffer[ i + 1 ] & 0xc0 ) != 0x80 )
return FALSE;
if(( pszBuffer[ i + 2 ] & 0xc0 ) != 0x80 )
return FALSE;
step = 3;
}
else
return FALSE;
i += step;
}
if( i == ncb )
return TRUE;
return FALSE;
}