UTF-8 转换 Unicode

UTF-8 转换 Unicode代码C++:

/*************************************************************************************

                      Unicode       <-   UTF-8 
(U-0000 ~ U-007F) 00000000 0xxxxxxx <- 0xxxxxxx
(U-0080 ~ U-07FF) 00000xxx xxxxxxxx <- 110xxxxx 10xxxxxx  
(U-0800 ~ U-FFFF) xxxxxxxx xxxxxxxx <- 1110xxxx 10xxxxxx 10xxxxxx 

*************************************************************************************/

int IsLittleEndian()
{
	unsigned short wTest = 1;
	unsigned char byTest = *(unsigned char*)(&wTest);

	return (byTest == 1);
}

int Utf82Unicode( const char *pchStrIn, char *pchStrOut, unsigned int dwOutBufLen, unsigned int &dwOutLen )
{
	dwOutLen = 0;

	if (NULL == pchStrIn)
	{
		return -1;
	}

	char *pchSrc = (char*)pchStrIn;
	
	unsigned int dwTmpLen = strlen(pchStrIn) * 2 + 1;
	char *pchTmpBuf = (char*)malloc(dwTmpLen);
	if (NULL == pchTmpBuf)
	{
		return -1;
	}

	char *pchDst = pchTmpBuf;
	memset(pchDst, 0, dwTmpLen);
	int bLittleEndian = IsLittleEndian();

	while (*pchSrc != '\0')
	{
		unsigned char byHigh = 0;
		unsigned char byLow = 0;

		if (*pchSrc > 0x00 && *pchSrc <= 0x7F)	//单字节UTF8字符(英文字母、数字)
		{
			byLow = *pchSrc;
			pchSrc++;
		}
		else if ( ( (*pchSrc) & 0xE0 ) == 0xC0 ) //双字节UTF8字符(英文字母、数字)
		{
			unsigned char byFst = *pchSrc;
			unsigned char bySec = *(pchSrc + 1);

			byLow = (0x3f & bySec) | (byFst << 6);
			byHigh = 0x07 & (byFst >> 2);

			pchSrc += 2;
		}
		else if ( ( (*pchSrc) & 0xF0) == 0xE0 ) //三字节UTF8字符
		{
			unsigned char byFst = *pchSrc;
			unsigned char bySec = *(pchSrc + 1);
			unsigned char byTrd = *(pchSrc + 2);

			byLow = (0x3f & byTrd) | (bySec << 6);
			byHigh = (byFst << 4) | (bySec >> 2 & 0x0f);

			pchSrc += 3;
		}
		else //其他字节数的utf-8不识别
		{
			dwOutLen = 0;
			free(pchTmpBuf);
			return -1;
		}

		if (bLittleEndian)
		{
			*pchDst = byLow;
			*(pchDst + 1) = byHigh;	
		}
		else
		{
			*pchDst = byHigh;
			*(pchDst + 1) = byLow;
		}

		pchDst += 2;
		dwOutLen += 2;
	}

	if (NULL == pchStrOut || dwOutLen > dwOutBufLen)
	{
		free(pchTmpBuf);
		return -1; //调用者提供的缓冲区大小不足
	}

	memcpy((void*)pchStrOut, (void*)pchTmpBuf, dwOutLen);
	free(pchTmpBuf);
	return 0;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值