C/C++文本文件件编码格式

 

判断一文本文件编码格式。打开.txt文件,文件-〉另存为-〉编码-〉ASCII,Unicode,Unicode big endian,UTF-8

根据文件头判断文件可能是上面的几种编码格式

WineHxe测试的文件头

unsigned char uniTxt[] = {0xFF, 0xFE};	// Unicode file header
unsigned char endianTxt[] = {0xFE, 0xFF};	// Unicode big endian file header
unsigned char utf8Txt[] = {0xEF, 0xBB, 0xBF};	// UTF_8 file header



菜鸟求拍砖

bool TextEncode(const char *fPath)
{
	char srcBuff[1024];
	char header[2];
	unsigned char uniTxt[] = {0xFF, 0xFE};			// Unicode file header
	unsigned char endianTxt[] = {0xFE, 0xFF};		// Unicode big endian file header
	unsigned char utf8Txt[] = {0xEF, 0xBB, 0xBF};	// UTF_8 file header
	int len = 0;
	int ascii = 0;

	FILE *pFile;
	pFile = fopen(fPath, "rb");
	if (NULL == pFile)
	{
		return false;
	}
	
	//  ASCII range(0~127)
	while (1)
	{
		len = fread(srcBuff, 1, 1024, pFile);
		if (0 == len)
		{
			break;
		}
		for (int i=0; i<len; i++)
		{
			header[0] = srcBuff[0];
			header[1] = srcBuff[1];
			header[2] = srcBuff[2];

			if (srcBuff[i]<0 || srcBuff[i]>127)
			{		
				ascii++;
			}
			
		}
	}
	
	if (0 == ascii)		// ASCII file
	{
		printf("ASCII text\n");
	}
	else if ((2 == ascii) && (0 == memcmp(header, uniTxt, sizeof(uniTxt))))		// Unicode file
	{
		printf("Unicode text\n");
	}
	else if ((2 == ascii) && (0 == memcmp(header, endianTxt, sizeof(endianTxt))))	//  Unicode big endian file
	{
		printf("Unicode big endian text\n");
	}
	else if ((3 == ascii) && (0 == memcmp(header, utf8Txt, sizeof(utf8Txt))))		// UTF-8 file
	{
		printf("UTF-8 text\n");
	} 
	else
	{
		printf("	Unknow\n");
	}
	fclose(pFile);
	return true; 
}


 

阅读更多

没有更多推荐了,返回首页