UTF转换为UNICODE

typedef struct
{
    uint16   length;
    BOOLEAN  is_ucs2;
    uint8   *str_ptr;
}MMI_STRING_T;
typedef struct 
{
 uint32 len;
 uint8 *buf_ptr;
}UTF8_T; 

#define _MEMCPY(_DEST_PTR, _DEST_LEN, _SRC_PTR, _SRC_LEN, _SIZE) /
_PASSERT((_SIZE) <= (_DEST_LEN), ("MMI_MEMCPY: the _DEST_LEN is too small!")); /
_PASSERT((_SIZE) <= (_SRC_LEN),  ("MMI_MEMCPY: the _SRC_LEN is too small!")); /
_MEMCPY((_DEST_PTR), (_SRC_PTR), (_SIZE))


/*****************************************************************************/
//  Description : ucs2 -> utf8
///*****************************************************************************/
UTF8_T MMS_UNICODE2UTF(MMI_STRING_T *str_src_ptr)
{
 UTF8_T utf8_str = {0};
 uint16 ucs2_char = 0; //here, we only consider UCS2. UCS4 is not considered.
 int  i = 0;
 int  j = 0;

 uint8 *out_ptr = PNULL;
 uint32 out_len = 0;
 uint32 src_len = 0;

 if (str_src_ptr == PNULL || str_src_ptr->str_ptr == PNULL || str_src_ptr->length == 0)
 {
  utf8_str.buf_ptr = 0;
  utf8_str.len = 0;
  return utf8_str;
 }
 
 //alloc
 out_len = str_src_ptr->length / 2 * 3 + 3;
 out_ptr = ALLOC(out_len);
 _ASSERT(out_ptr != PNULL);
 _MEMSET(out_ptr, 0, out_len);

 if (!str_src_ptr->is_ucs2)
 {
  //this is ACSII
  _MEMCPY(
   out_ptr,
   out_len,
   str_src_ptr->str_ptr,
   str_src_ptr->length,
   str_src_ptr->length
   );
  utf8_str.buf_ptr = out_ptr;
  utf8_str.len = str_src_ptr->length;
 }
 else
 {
  src_len = str_src_ptr->length;
  if ((src_len % 2) != 0) //half of ucs2, ignore it
  {
   src_len--;
  }

  for (i = 0, j = 0; i < str_src_ptr->length / 2; i++)
  {
   //get unicode character
   ucs2_char = ((str_src_ptr->str_ptr[2 * i]) << 8) | str_src_ptr->str_ptr[2 * i + 1];

   if (ucs2_char <= 0x7F)
   {
    //0xxx xxxx
    //0111 1111 == 0x7F
    out_ptr[j] = ucs2_char & 0x7F;
    j++;
   }
   else if (ucs2_char <= 0x7FF)
   {
    //110x xxxx 10xx xxxx
    //11位拆成5,6位

    //0000 0111 1100 0000 == 0x7C0 110 == 0x6 //取出高5位+(110 << 5)
    //out_ptr[j] = (uint8)(((ucs2_char & 0x7C0) >> 5) | (0x6 << 5));
                out_ptr[j] = (uint8)(((ucs2_char & 0x7C0) >> 6) | (0x6 << 5));
    j++;
    
    //0000 0000 0011 1111 = 0x3F 10 == 0x2 //取出低6位+(10 << 6)
    out_ptr[j] = (uint8)((ucs2_char & 0x3F) | (0x2 << 6));
    j++;
   }
   else if (ucs2_char <= 0xFFFF)
   {
    //1110 xxxx 10xx xxxx 10xx xxxx
    //16位拆成4, 6, 6

    //1111 0000 0000 0000 == 0xF000  1110 == 0xE //取出高4位 + (1110 << 4)
    out_ptr[j] = (uint8)((ucs2_char & 0xF000) >> 12) | (0xE << 4);
    j++;

    //0000 1111 1100 0000 == 0xFC0  10 = 0x2 //中间6位 + (10 << 6)
    out_ptr[j] = (uint8)(((ucs2_char & 0xFC0 ) >> 6) | (0x2 << 6));
    j++;

    //0000 0000 0011 1111 == 0x3F  10 = 0x2 //最后6位 + (10 << 6)
    out_ptr[j] = (uint8)((ucs2_char & 0x3F) | (0x2 << 6));
    j++;

   }
  }

  utf8_str.buf_ptr = out_ptr;
  utf8_str.len = j;
  _ASSERT(j <= out_len);
 }

 return utf8_str;
}

/*****************************************************************************/
//  Description : utf8 -> ucs2
///*****************************************************************************/
MMI_STRING_T MMS_UTF2UNICODE(UTF8_T *utf8_str_ptr)
{
 int  i = 0;
 int  j = 0;

 uint8 *ucs2_ptr = PNULL;
 uint32 out_len = 0;

 uint8 head_char = 0;
 uint16 ucs2_char = 0;

 BOOLEAN is_asc = TRUE;

 MMI_STRING_T str_out = {0};

 _ASSERT(utf8_str_ptr != PNULL);
 out_len = utf8_str_ptr->len * 2 + 2;
 ucs2_ptr = SCI_ALLOC(out_len);
 _ASSERT(ucs2_ptr != PNULL);
 _MEMSET(ucs2_ptr, 0, out_len);

 is_asc = TRUE;
 while(i < utf8_str_ptr->len)
 {
  head_char = utf8_str_ptr->buf_ptr[i];

  if (head_char <= 0x7F)
  {
   //0xxx xxxx
   //0111 1111 = 0X7F
   ucs2_ptr[j] = 0;
   ucs2_ptr[j + 1] = head_char;
   j += 2;

   i += 1;
  }
  else if (head_char <= 0xDF)
  {
   if (i + 1 <= utf8_str_ptr->len - 1)
   {
    //110x xxxx 10xx xxxx
    //1101 1111 == 0xDF

    //0001 1111 == 0x1F  0011 1111 == 0x3F
    ucs2_char = ((utf8_str_ptr->buf_ptr[i] & 0x1F) << 6) | ((utf8_str_ptr->buf_ptr[i+1] & 0x3F));
    ucs2_ptr[j] = (uint8)((ucs2_char & 0xFF00) >> 8);
    ucs2_ptr[j + 1] = (uint8)(ucs2_char & 0xFF);
    j += 2;

   }
   
   i += 2;
   is_asc = FALSE;
  }
  else if (head_char <= 0xEF)
  {
   if (i + 2 <= utf8_str_ptr->len - 1)
   {
    //1110 xxxx 10xx xxxx 10xx xxxx
    //1110 1111 == 0xEF

    //0000 1111 == 0xF 0011 1111 == 0x3F
    ucs2_char = ((utf8_str_ptr->buf_ptr[i] & 0xF) << 12) | ((utf8_str_ptr->buf_ptr[i + 1] & 0x3F) << 6) | (utf8_str_ptr->buf_ptr[i + 2] & 0x3F);
    ucs2_ptr[j] = (uint8)((ucs2_char & 0xFF00) >> 8);
    ucs2_ptr[j + 1] = (uint8)(ucs2_char & 0xFF);
    j += 2;
   }
   i += 3;
   is_asc = FALSE;
  }
  else
  {
   i++;
   break;
  }

 }

 if (is_asc)
 {
  str_out.is_ucs2 = FALSE;
  str_out.length = utf8_str_ptr->len;
  _MEMCPY(
   ucs2_ptr,
   out_len,
   utf8_str_ptr->buf_ptr,
   utf8_str_ptr->len,
   utf8_str_ptr->len
   );
  str_out.str_ptr = ucs2_ptr;
 }
 else
 {
  str_out.is_ucs2 = TRUE;
  str_out.length = j;
  str_out.str_ptr = ucs2_ptr;
 }

 return str_out;
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值