Unicode 转换 UTF-8

Unicode 转换 UTF-8 代码c++:

/*************************************************************************************

                      Unicode       ->   UTF-8 
(U-0000 ~ U-007F) 00000000 0xxxxxxx -> 0xxxxxxx
(U-0080 ~ U-07FF) 00000xxx xxxxxxxx -> 110xxxxx 10xxxxxx  
(U-0800 ~ U-FFFF) xxxxxxxx xxxxxxxx -> 1110xxxx 10xxxxxx 10xxxxxx 

*************************************************************************************/

int IsLittleEndian()
{
	unsigned short wTest = 1;
	unsigned char byTest = *(unsigned char*)(&wTest);

	return (byTest == 1);
}

unsigned short makeUnicode(unsigned short byHigh, unsigned short byLow)
{
	return (unsigned short)((unsigned short)byHigh << 8 | byLow);
}

int Unicode2Utf8( const char *pchStrIn, char *pchStrOut, unsigned int dwOutBufLen, unsigned int &dwOutLen )
{
	dwOutLen = 0;	

	if (NULL == pchStrIn)
	{
		return -1;
	}

	char *pcSrc = (char*)pchStrIn;
	unsigned int dwTmpLen = 1024;
	char *pchTmpBuf = (char*)malloc(dwTmpLen);
	if (NULL == pchTmpBuf)
	{
		return -1;
	}

	char *pchDst = pchTmpBuf;
	memset(pchDst, 0, dwTmpLen);

	int bLittleEndian = IsLittleEndian();

	while (NULL != pchDst && pchDst < (pchTmpBuf + dwTmpLen) && (*((unsigned short*)pcSrc) != 0) )
	{
		unsigned char byHigh = 0;
		unsigned char byLow = 0;
		unsigned short wUtf16 = 0;

		if (bLittleEndian)
		{
			byHigh = *(pcSrc + 1);
			byLow = *pcSrc;
		}
		else
		{
			byLow = *(pcSrc + 1);
			byHigh = *pcSrc;
		}

		wUtf16 = makeUnicode(byHigh, byLow);//合成unicode字符

		if (0 == wUtf16) //结束符
		{
			*pchDst = '\0';
			break;
		}
		else if (wUtf16 <= 0x007f)	//一字节utf-8
		{
			*pchDst = byLow;

			pchDst++;
			dwOutLen++;
		}
		else if (wUtf16 >= 0x0080 && wUtf16 <= 0x07ff)	//双字节utf-8 
		{
			*pchDst = 0xC0 | (byHigh << 2) | (byLow >> 6);
			*(pchDst + 1) = 0x80 | (byLow & 0x3f);

			pchDst += 2;
			dwOutLen += 2;
		}
		else if (wUtf16 >= 0x0800 && wUtf16 <= 0xffff) //三字节utf-8
		{
			*pchDst = 0xE0 | (byHigh >> 4);
			*(pchDst + 1) = 0x80 | ((byHigh & 0x0f) << 2) | (byLow >> 6);
			*(pchDst + 2) = 0x80 | (byLow & 0x3f);

			pchDst +=3;
			dwOutLen += 3;
		}
		else  //暂不转换到其他字节数的utf-8
		{
			dwOutLen = 0;
			free(pchTmpBuf);
			return -1;
		}

		pcSrc+=2;
	}
	
	if (NULL == pchStrOut || dwOutLen > dwOutBufLen)
	{
		free(pchTmpBuf);
		return -1; //调用者提供的缓冲区大小不足
	}
	
	memcpy((void*)pchStrOut, (void*)pchTmpBuf, dwOutLen);\
	free(pchTmpBuf);
	return 0;
}

  • 12
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值