纯c 实现utf8与ucs2 相互转换

网上很多代码,好多都是单个字符转码,自己整理了一下分享一下,望指正

UCS2:2字节unicode
utf8:多字节编码

int ucs2_to_utf8(const unsigned char *in, int ilen, unsigned char *out, int olen)
{
    int length = 0;
    if(!out) return length;
    char *start = NULL;
    char *pout = out;
    for(start = in; start != NULL && start < in+ilen-1; start +=2)
    {
        unsigned short ucs2_code = *(unsigned short *)start;
        if(0x0080 > ucs2_code)
         {
             /* 1 byte UTF-8 Character.*/
             if(length+1 > olen) return -1;

             *pout = (char)*start;
             length++;
             pout ++;
         }
         else if(0x0800 > ucs2_code)
         {
             /*2 bytes UTF-8 Character.*/
             if(length+2 > olen) return -1;
             *pout = ((char)(ucs2_code >> 6)) | 0xc0;
             *(pout+1) = ((char)(ucs2_code & 0x003F)) | 0x80;
             length += 2;
             pout += 2;
         }
         else
         {
             /* 3 bytes UTF-8 Character .*/
             if(length+3 > olen) return -1;

             *pout = ((char)(ucs2_code >> 12)) | 0xE0;
             *(pout+1) = ((char)((ucs2_code & 0x0FC0)>> 6)) | 0x80;
             *(pout+2) = ((char)(ucs2_code & 0x003F)) | 0x80;
             length += 3;
             pout += 3;
         }
    }

    return length;
}

int utf8_to_ucs2(const unsigned char *in, int ilen, unsigned char *out,
  int olen) {

 unsigned char *inoffset = in;
 unsigned char *inend = in + ilen;

 int ret = 0;
 while (inoffset < inend && ret + 2 <= olen) {
  unsigned short temp1, temp2;

  unsigned char * one = inoffset;
  unsigned short ucs2_code = 0;

  if (0x00 == (*one & 0x80)) {
   /* 1 byte UTF-8 Charater.*/
   ucs2_code = (unsigned short) *one;
   inoffset++;
   memcpy(out + ret, &ucs2_code, 2);
   ret += 2;
   continue;

  } else if (0xc0 == (*one & 0xe0) && 0x80 == (*(one + 1) & 0xc0)) {
   /* 2 bytes UTF-8 Charater.*/
   temp1 = (unsigned short) (*one & 0x1f);
   temp1 <<= 6;
   temp1 |= (unsigned short) (*(one + 1) & 0x3f);
   ucs2_code = temp1;
   inoffset += 2;
   memcpy(out + ret, &ucs2_code, 2);
   ret += 2;
   continue;
  } else if (0xe0 == (*one & 0xf0) && 0x80 == (*(one + 1) & 0xc0) && 0x80
    == (*(one + 2) & 0xc0)) {
   /* 3bytes UTF-8 Charater.*/
   temp1 = (unsigned short) (*one & 0x0f);
   temp1 <<= 12;
   temp2 = (unsigned short) (*(one + 1) & 0x3F);
   temp2 <<= 6;
   temp1 = temp1 | temp2 | (unsigned short) (*(one + 2) & 0x3F);
   ucs2_code = temp1;
   inoffset += 3;
   memcpy(out + ret, &ucs2_code, 2);
   ret += 2;
   continue;
  } else {
   break;
  }
 }
 return ret;
}
  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值