主要是根据文件的前面几个字节进行判断,这里对UTF32等没做判断
#include <iostream>
#include <fstream>
using namespace std;
enum
{
CODE_TYPE_UNICODE_LE,
CODE_TYPE_UNICODE_BE,
CODE_TYPE_UTF8,
CODE_TYPE_NEED_VERIFY,
};
int DetectCodeType(char *pFilePath)
{
int codeType = 0;
char buffer[16] = {0};
ifstream ifs;
ifs.open(pFilePath, ifstream::in);
if(ifs.good())
{
// 读取前面三个字节即可
ifs.read(buffer, 3);
if (memcmp("\xFE\xFF", buffer, 2) == 0)
{
codeType = CODE_TYPE_UNICODE_BE;
}
else if (memcmp("\xFF\xFE", buffer, 2) == 0)
{
codeType = CODE_TYPE_UNICODE_LE;
}
else if (memcmp("\xEF\xBB", buffer, 2) == 0)
{
codeType = CODE_TYPE_UTF8;
}
else
{
codeType = CODE_TYPE_NEED_VERIFY;
}
}
ifs.close();
return codeType;
}
char* TestMemMove(char *src, char *dst, int count)
{
char *ret = dst;
if (src == NULL || dst == NULL)
{
return NULL;
}
if (dst <= src || dst >= (src + count))
{
while (count--)
{
*dst++ = *src++;
}
}
else // 内存重叠
{
dst += count - 1;
src += count - 1;
while (count--)
{
*dst-- = *src--;
}
}
return ret;
}
int _tmain(int argc, _TCHAR* argv[])
{
char fileName[][64] =
{
"E:\\Temp\\TestUnicodeBE.txt",
"E:\\Temp\\TestUnicodeLE.txt",
"E:\\Temp\\TestUTF8.txt",
"E:\\Temp\\TestANSI.txt"
};
int count = sizeof(fileName) / 64;
for (int i = 0; i < count; i++)
{
int codeType = DetectCodeType(fileName[i]);
switch (codeType)
{
case CODE_TYPE_UNICODE_LE:
{
cout << "This is a Unicode LE file" << endl;
}
break;
case CODE_TYPE_UNICODE_BE:
{
cout << "This is a Unicode BE file" << endl;
}
break;
case CODE_TYPE_UTF8:
{
cout << "This is a UTF8 file" << endl;
}
break;
case CODE_TYPE_NEED_VERIFY:
{
cout << "This file type need verify" << endl;
}
break;
}
}
return 0;
}