MFC GB2312、UTF-8、unicode 之间转换

//GB2312到UTF-8的转换
static int GB2312ToUtf8(const char* gb2312, char* utf8)
{
int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}

//判断是否是utf8
bool IsTextUTF8(const char* str, long length)
{
	unsigned char chr;

	int nBytes = 0; // UFT8可用1-6个字节编码,ASCII用一个字节
	bool bAllAscii = true; // 如果全部都是ASCII, 说明不是UTF-8

	for (int i=0; i < length; i++)
	{
		chr = *(str + i);

		if ((chr&0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8, ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx
		{
			bAllAscii = false;
		}

		if (nBytes == 0) // 如果不是ASCII码,应该是多字节符,计算字节数
		{
			if (chr >= 0x80)
			{
				if (chr>=0xFC && chr<=0xFD)
					nBytes = 6;
				else if (chr>=0xF8)
					nBytes = 5;
				else if (chr>=0xF0)
					nBytes = 4;
				else if (chr>=0xE0)
					nBytes = 3;
				else if (chr>=0xC0)
					nBytes = 2;
				else
					return false;

				nBytes--;
			}
		}
		else // every char of ascii buffer looks like 10xxxxxx, except the first char
		{
			if( (chr&0xC0) != 0x80 )
			{
				return false;
			}
			nBytes--;
		}
	}

	if (nBytes > 0) // format error
	{
		return false;
	}

	if (bAllAscii) // if all chars are ascii, the buffer is not utf-8
	{
		return false;
	}

	return true;
}


//UTF-8到GB2312的转换
static int Utf8ToGB2312(const char* utf8, char* gb2312)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_ACP, 0, wstr, -1, gb2312, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}


//GB2312到Unicode的转换
static int GB2312ToUnicode(const char* gb2312, char* unicode)
{
UINT nCodePage = 936; //GB2312
int len = MultiByteToWideChar(nCodePage, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(nCodePage, 0, gb2312, -1, wstr, len);
len = len*sizeof(wchar_t);
memcpy(unicode, wstr, len);
if(wstr) delete[] wstr;
return len;
}


//Unicode到GB2312的转换
static int UnicodeToGB2312(const char* unicode, int size, char*gb2312)
{
UINT nCodePage = 936; //GB2312
wchar_t* wstr = new wchar_t[size/2+1];
memcpy(wstr, unicode, size);
int len = WideCharToMultiByte(nCodePage, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(nCodePage, 0, wstr, -1, gb2312, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}




//UTF-8到Unicode的转换
static int Utf8ToUnicode(const char* utf8, char*unicode)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
memcpy(unicode, wstr, len);
if(wstr) delete[] wstr;
return len;
}


//Unicode到UTF-8的转换
static int UnicodeToUtf8(const char* unicode, int size, char* utf8)
{
wchar_t* wstr = new wchar_t[size/2+1];
memcpy(wstr, unicode, size);
int len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}

 

转自http://blog.csdn.net/seven407/article/details/7712823

 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值