Windows kernel中实现Unicode和UTF-8之间的转换

Widnows kernel中不存在Unicdoe/UTF-8转换函数, 因此通过分析UTF-8编码表自己实现转换函数

UTF-8编码字符理论上可以最多到6个字节长,然而16位BMP(Basic Multilingual Plane)字符最多只用到3字节长。下面看一下UTF-8编码表:

U-00000000 - U-0000007F: 0xxxxxxx
U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 

enum Hex{ HexC0 = 0xC0, HexE0 = 0xE0, HexF0 = 0xF0, HexF8 = 0xF8, HexFC = 0xFC, HexFE = 0xFE, HexFF = 0xFF, Hex80 = 0x80, Hex800 = 0x800, Hex10000 = 0x10000, Hex200000 = 0x200000, Hex4000000 = 0x4000000, Hex80000000 = 0x80000000};

/* 
 * des: The address of a sequence of Unicode characters.
 * src: The address of a sequence of UTF8 characters.
 * size_d: The number of Unicode characters in word.
 * size_s: The number of UTF8 characters to convert in byte.
 */
VOID UTF82Unicode(PWCHAR des, PBYTE src, int size_d, int size_s)
{
	int s = 0, d = 0;
	
	while(s < size_s && d < size_d)
	{
		// 1 Byte UTF8
		if(src[s] < Hex80)
		{
			des[d++] = src[s++];
		}
		// Suffix byte
		if(src[s] >= Hex80 && src[s] < HexC0)
		{
			s++;
			continue;
		}
		//  2 Bytes UTF8
		if(src[s] >= HexC0 && src[s] < HexE0)
		{
			if(s + 1 >= size_s)
				break;
			des[d] = (src[s] & 0x1F) << 6;
			des[d] |= src[s+1] & 0x3F;
			d++;
			s++;
		}
		// 3 Bytes UTF8
		if(src[s] >= HexE0 && src[s] < HexF0)
		{
			if(s + 2 >= size_s)
				break;
			des[d] = (src[s] & 0x0F) << 12;
			des[d] |= (src[s+1] & 0x3F) << 6;
			des[d] |= src[s+2] & 0x3F;
			d++;
			s++;
		}
	}
}

/* 
 * des: The address of a sequence of UTF8 characters.
 * src: The address of a sequence of Unicode characters.
 * size_d: The number of UTF8 characters in byte.
 * size_s: The number of Unicode characters to convert in word.
 */
VOID Unicode2UTF8(PBYTE des, PWCHAR src, int size_d, int size_s)
{
	int s = 0, d = 0;
	while(s < size_s && d < size_d)
	{
		if(src[s] < Hex80)
		{
			// 1 Byte UTF-8
			des[d] = (BYTE)src[s];
			d++; 
			s++;
		}
		if(src[s] >= Hex80 && src[s] < Hex800)
		{
			// 2 Bytes UTF-8
			if(d + 1 >= size_d)
				break;
			des[d] = (BYTE)(0xC0 | (src[s] >> 6));
			des[d+1] = (BYTE)(0x80 | (src[s] & 0x003F));
			d += 2;
			s++;
		}
		if(src[s] >= Hex800 && src[s] < Hex10000)
		{
			// 3 Bytes UTF-8
			if(d + 2 >= size_d )
				break;
			des[d] = (BYTE)(0xE0 | (src[s] >> 12));
			des[d+1] = (BYTE)(0x80 | ((src[s] >> 6) & 0x003F));
			des[d+2] = (BYTE)(0x80 | (src[s] & 0x003F));
			d += 3;
			s++;
		}
	}
}


  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值