阿拉伯文排版规则

https://blog.csdn.net/wuxinyanzi/article/details/12912483

从开始到结束逐个转换阿拉伯文的unicode值,
由于从右到左显示,在转换完成之后,需要将反转一下阿拉伯文的unicode,然后显示出来。


#define ALB_SET_FIRST_LEN   	23
#define ALB_SET_LAST_LEN   		35

#define FIRST        			0
#define LAST           			1
#define MIDDLE         			2
#define ALONE          			3

#define ARBIC_START_ADDR 		(0x621)
#define ARBIC_END_ADDR   		(0x64A)



//转换unicode的数据集合
//first, last, middle, alone
const uint16_t Arbic_Position[][4] =
{
		{ 0xfe80, 0xfe80, 0xfe80, 0xfe80},    // 0x621
		{ 0xfe82, 0xfe81, 0xfe82, 0xfe81},
		{ 0xfe84, 0xfe83, 0xfe84, 0xfe83},
		{ 0xfe86, 0xfe85, 0xfe86, 0xfe85},
		{ 0xfe88, 0xfe87, 0xfe88, 0xfe87},
		{ 0xfe8a, 0xfe8b, 0xfe8c, 0xfe89},
		{ 0xfe8e, 0xfe8d, 0xfe8e, 0xfe8d},
		{ 0xfe90, 0xfe91, 0xfe92, 0xfe8f},   // 0x628
		{ 0xfe94, 0xfe93, 0xfe93, 0xfe93},
		{ 0xfe96, 0xfe97, 0xfe98, 0xfe95},   // 0x62A
		{ 0xfe9a, 0xfe9b, 0xfe9c, 0xfe99},
		{ 0xfe9e, 0xfe9f, 0xfea0, 0xfe9d},
		{ 0xfea2, 0xfea3, 0xfea4, 0xfea1},
		{ 0xfea6, 0xfea7, 0xfea8, 0xfea5},
		{ 0xfeaa, 0xfea9, 0xfeaa, 0xfea9},
		{ 0xfeac, 0xfeab, 0xfeac, 0xfeab},   // 0x630
		{ 0xfeae, 0xfead, 0xfeae, 0xfead},
		{ 0xfeb0, 0xfeaf, 0xfeb0, 0xfeaf},
		{ 0xfeb2, 0xfeb3, 0xfeb4, 0xfeb1},
		{ 0xfeb6, 0xfeb7, 0xfeb8, 0xfeb5},
		{ 0xfeba, 0xfebb, 0xfebc, 0xfeb9},
		{ 0xfebe, 0xfebf, 0xfec0, 0xfebd},
		{ 0xfec2, 0xfec3, 0xfec4, 0xfec1},
		{ 0xfec6, 0xfec7, 0xfec8, 0xfec5},  // 0x638
		{ 0xfeca, 0xfecb, 0xfecc, 0xfec9},
		{ 0xfece, 0xfecf, 0xfed0, 0xfecd},  //0x63A
		{ 0x63b,  0x63b,  0x63b,  0x63b},
		{ 0x63c,  0x63c,  0x63c,  0x63c},
		{ 0x63d,  0x63d,  0x63d,  0x63d},
		{ 0x63e,  0x63e,  0x63e,  0x63e},
		{ 0x63f,  0x63f,  0x63f,  0x63f},
		{ 0x640,  0x640,  0x640,  0x640},   // 0x640
		{ 0xfed2, 0xfed3, 0xfed4, 0xfed1},
		{ 0xfed6, 0xfed7, 0xfed8, 0xfed5},
		{ 0xfeda, 0xfedb, 0xfedc, 0xfed9},
		{ 0xfede, 0xfedf, 0xfee0, 0xfedd},
		{ 0xfee2, 0xfee3, 0xfee4, 0xfee1},
		{ 0xfee6, 0xfee7, 0xfee8, 0xfee5},
		{ 0xfeea, 0xfeeb, 0xfeec, 0xfee9},
		{ 0xfeee, 0xfeed, 0xfeee, 0xfeed},  // 0x648
		{ 0xfef0, 0xfeef, 0xfef0, 0xfeef},
		{0xfef2, 0xfef3, 0xfef4, 0xfef1},   // 0x64A
};

// 判断是否是前连数据集合 first
const uint16_t Arbic_First_Set[ALB_SET_FIRST_LEN]={0x62c, 0x62d, 0x62e, 0x647, 0x639, 0x63a, 0x641, 0x642,
							 0x62b, 0x635, 0x636, 0x637, 0x643, 0x645, 0x646, 0x62a,
							 0x644, 0x628, 0x64a, 0x633, 0x634, 0x638, 0x626};

// 判断是否是后连数据集合 last
const uint16_t Arbic_Last_Set[ALB_SET_LAST_LEN]={0x62c, 0x62d, 0x62e, 0x647, 0x639, 0x63a, 0x641, 0x642,
        	 	 	 	 	 0x62b, 0x635, 0x636, 0x637, 0x643, 0x645, 0x646, 0x62a,
        	 	 	 	 	 0x644, 0x628, 0x64a, 0x633, 0x634, 0x638, 0x626, 0x627,
        	 	 	 	 	 0x623, 0x625, 0x622, 0x62f, 0x630, 0x631, 0x632, 0x648,
        	 	 	 	 	 0x624, 0x629, 0x649};

// 规则二数据集合
const uint16_t Arabic_Specs[4][2]=
{
		{0xFEF5,0xFEF6},
		{0xFEF7,0xFEF8},
		{0xFEF9,0xFEFA},
		{0xFEFB,0xFEFC},
};

int Is_Arabic_Language(unsigned int unicode)//判断是否为arbic
{
	int ret = 0;
	if((unicode < 0x6ff) && (unicode > 0x600))
	{
		ret = 1;
	}
	else if((unicode < 0xfdff) && (unicode > 0xfb50))
	{
		ret = 1;
	}
	else if((unicode < 0xfeff) && (unicode > 0xfe70))
	{
		ret = 1;
	}
//	else if(unicode == 0x20)
//	{
//		ret = 1;
//	}
	return ret;
}

void Cut_Out_Not_ArbicStr(unsigned int *unicode_str, unsigned int start, int *len, unsigned int max_len) //获取不是arbic的字符长度
{
	unsigned int start_pos = start;
//	if(!Is_Arabic_Language(unicode_str[start_pos]) && (start_pos < max_len))
//	{
//		printf("no code[%d] = 0x%x \n", start_pos, unicode_str[start_pos]);
//		(*len)++;
//		start_pos ++;
//		Cut_Out_Not_ArbicStr(unicode_str, start_pos, len, max_len);
//	}
	for(start_pos = start; start_pos < max_len; start_pos++)
	{
		if(!Is_Arabic_Language(unicode_str[start_pos]))
		{
			(*len)++;
		}
		else
		{
			break;
		}
	}
}

void Reverse_Unicode(unsigned int *unicode_str, unsigned int unicode_len)//arbic 字符前后倒置
{
	int i = 0;
	unsigned int value[128] = {0};
	int len = 0;
	int j = 0;

	for(i = 0; i < unicode_len; i++)
	{
		if(Is_Arabic_Language(unicode_str[i]))
		{
			value[unicode_len - 1 - i] = unicode_str[i];
		}
		else
		{
			len = 0;
			Cut_Out_Not_ArbicStr(unicode_str, i, &len, unicode_len);
			for(j = 0; j < len; j++)
			{
				value[unicode_len - i - len + j] = unicode_str[i+j];
			}
			i = i + len - 1;
		}
	}
	for(j = 0; j < unicode_len; j++)
	{
		unicode_str[j] = value[j];
	}
}

//判断是否为前连
int IsIn_Arbic_First_Set(unsigned int value)
{
	int i;

	for(i = 0; i < ALB_SET_FIRST_LEN; i++)
	{
		if(Arbic_First_Set[i] == value)
		{
			return 1;
		}
	}
	return 0;
}

//判断是否为后连
int IsIn_Arbic_Last_Set(unsigned int value)
{
	int i;

	for(i = 0; i < ALB_SET_LAST_LEN; i++)
	{
		if(Arbic_Last_Set[i] == value)
		{
			return 1;
		}
	}
	return 0;
}

// 判断unicode是否在需要转换的数据集合中
int Is_Need_Trans(unsigned int value)
{
	if(Is_Arabic_Language(value))
	{
		if((value >= ARBIC_START_ADDR) && (value <= ARBIC_END_ADDR))
		{
			return 1;
		}
	}
	return 0;
}


void Arbic_Trans(unsigned int *unicode_str, unsigned int value_len, unsigned int *alb_trans_str, unsigned int *alb_trans_len)//arbic转换规则  https://blog.csdn.net/wuxinyanzi/article/details/12912483
{
	int i;

	*alb_trans_len = 0;

	for(i = 0; i < value_len; i++)
	{
		if(Is_Need_Trans(unicode_str[i]) && (value_len > 1)) // 判断是否为arbic、需要转换、或长度大于1
		{
			if((i==0) || (!Is_Arabic_Language(unicode_str[i-1])))//首个字符或者前连不是arbic
			{
				if(IsIn_Arbic_Last_Set(unicode_str[i+1]))
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][LAST];
				}
				else
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][ALONE];
				}
			}
			else if((i + 1) >= value_len) //最后一个字符
			{
				if(IsIn_Arbic_First_Set(unicode_str[i-1]))
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][FIRST];
				}
				else
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][ALONE];
				}
			}
			else if(!Is_Arabic_Language(unicode_str[i+1]))//后一个字符不是arbic,只需判断前连
			{
				if(IsIn_Arbic_First_Set(unicode_str[i-1]))
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][FIRST];
				}
				else
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][ALONE];
				}
			}
			else if((unicode_str[i] == 0x644) &&((unicode_str[i+1] == 0x622)||(unicode_str[i+1] == 0x623)||(unicode_str[i+1] == 0x625)||(unicode_str[i+1] == 0x627)))// 按照规则2 来转换arbic
			{
				if(IsIn_Arbic_First_Set(unicode_str[i-1]))
				{
					if(unicode_str[i+1] == 0x622)
					{
						alb_trans_str[(*alb_trans_len)++] = Arabic_Specs[0][1];
					}
					else if(unicode_str[i+1] == 0x623)
					{
						alb_trans_str[(*alb_trans_len)++] = Arabic_Specs[1][1];
					}
					else if(unicode_str[i+1] == 0x625)
					{
						alb_trans_str[(*alb_trans_len)++] = Arabic_Specs[2][1];
					}
					else
					{
						alb_trans_str[(*alb_trans_len)++] = Arabic_Specs[3][1];
					}
				}
				else
				{
					if(unicode_str[i+1] == 0x622)
					{
						alb_trans_str[(*alb_trans_len)++] = Arabic_Specs[0][0];
					}
					else if(unicode_str[i+1] == 0x623)
					{
						alb_trans_str[(*alb_trans_len)++] = Arabic_Specs[1][0];
					}
					else if(unicode_str[i+1] == 0x625)
					{
						alb_trans_str[(*alb_trans_len)++] = Arabic_Specs[2][0];
					}
					else
					{
						alb_trans_str[(*alb_trans_len)++] = Arabic_Specs[3][0];
					}
				}
				i++;
			}
			else
			{
				if((IsIn_Arbic_First_Set(unicode_str[i-1])) && (IsIn_Arbic_Last_Set(unicode_str[i+1])))
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][MIDDLE];
				}
				else if((IsIn_Arbic_First_Set(unicode_str[i-1])) && (!IsIn_Arbic_Last_Set(unicode_str[i+1])))
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][FIRST];
				}
				else if((!IsIn_Arbic_First_Set(unicode_str[i-1])) && (IsIn_Arbic_Last_Set(unicode_str[i+1])))
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][LAST];
				}
				else
				{
					alb_trans_str[(*alb_trans_len)++] = Arbic_Position[unicode_str[i] - ARBIC_START_ADDR][ALONE];
				}
			}
		}
		else //不需要转换
		{
			alb_trans_str[(*alb_trans_len)++] = unicode_str[i];
		}

	}
}
  • 5
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值