判断一文本文件编码格式。打开.txt文件,文件-〉另存为-〉编码-〉ASCII,Unicode,Unicode big endian,UTF-8
根据文件头判断文件可能是上面的几种编码格式
WineHxe测试的文件头
unsigned char uniTxt[] = {0xFF, 0xFE}; // Unicode file header
unsigned char endianTxt[] = {0xFE, 0xFF}; // Unicode big endian file header
unsigned char utf8Txt[] = {0xEF, 0xBB, 0xBF}; // UTF_8 file header
菜鸟求拍砖
bool TextEncode(const char *fPath)
{
char srcBuff[1024];
char header[2];
unsigned char uniTxt[] = {0xFF, 0xFE}; // Unicode file header
unsigned char endianTxt[] = {0xFE, 0xFF}; // Unicode big endian file header
unsigned char utf8Txt[] = {0xEF, 0xBB, 0xBF}; // UTF_8 file header
int len = 0;
int ascii = 0;
FILE *pFile;
pFile = fopen(fPath, "rb");
if (NULL == pFile)
{
return false;
}
// ASCII range(0~127)
while (1)
{
len = fread(srcBuff, 1, 1024, pFile);
if (0 == len)
{
break;
}
for (int i=0; i<len; i++)
{
header[0] = srcBuff[0];
header[1] = srcBuff[1];
header[2] = srcBuff[2];
if (srcBuff[i]<0 || srcBuff[i]>127)
{
ascii++;
}
}
}
if (0 == ascii) // ASCII file
{
printf("ASCII text\n");
}
else if ((2 == ascii) && (0 == memcmp(header, uniTxt, sizeof(uniTxt)))) // Unicode file
{
printf("Unicode text\n");
}
else if ((2 == ascii) && (0 == memcmp(header, endianTxt, sizeof(endianTxt)))) // Unicode big endian file
{
printf("Unicode big endian text\n");
}
else if ((3 == ascii) && (0 == memcmp(header, utf8Txt, sizeof(utf8Txt)))) // UTF-8 file
{
printf("UTF-8 text\n");
}
else
{
printf(" Unknow\n");
}
fclose(pFile);
return true;
}