UTF-8与Unicode互相转换

UTF-8与Unicode互相转换

根据如下表格

unicodeutf-8
U+0000 - U+007F0xxxxxxx (1个字节)
U+0080 - U+07FF110xxxxx 10xxxxxx (2个字节)
U+0800 - U+FFFF1110xxxx 10xxxxxx 10xxxxxx (3个字节)
U+10000 - U+10FFFF11110xxx 10xxxxxx 10xxxxxx 10xxxxxx (4个字节)

编写如下C语言代码

typedef unsigned uchar;
typedef struct {
  uchar* buffer;
  unsigned long long length;
  unsigned long long size;
} ustring;

// utf-8转unicode
ustring getFromBytes(char* bytes, void errorHandler(char* errorMsg)) {
  ustring string;
  string.buffer = (uchar*)malloc(sizeof(uchar) * INIT_STRING_SIZE);
  string.size = INIT_STRING_SIZE;
  string.length = 0;
  size_t bytesSize = strlen(bytes);

  for (size_t ptr = 0; ptr < bytesSize; ptr++) {
    uchar ch = 0;
    unsigned char byte = bytes[ptr];

    if ((byte & 0xf0) == 0xf0) {
      if (ptr + 3 >= bytesSize) {
        free(string.buffer);
        string.buffer = NULL;
        errorHandler(UTF8_ERROR_MSG);
      }
      ch = ((byte & 7) << 18) + ((bytes[ptr + 1] & 0x3f) << 12) + ((bytes[ptr + 2] & 0x3f) << 6) + (bytes[ptr + 3] & 0x3f);
      ptr += 3;
    } else if ((byte & 0xe0) == 0xe0) {
      if (ptr + 2 >= bytesSize) {
        free(string.buffer);
        string.buffer = NULL;
        errorHandler(UTF8_ERROR_MSG);
      }
      ch = ((byte & 0xf) << 12) + ((bytes[ptr + 1] & 0x3f) << 6) + (bytes[ptr + 2] & 0x3f);
      ptr += 2;
    } else if ((byte & 0xc0) == 0xc0) {
      if (ptr + 1 >= bytesSize) {
        free(string.buffer);
        string.buffer = NULL;
        errorHandler(UTF8_ERROR_MSG);
      }
      ch = ((byte & 0x1f) << 6) + (bytes[ptr + 1] & 0x3f);
      ptr++;
    } else if (byte < 0x80) {
      ch = byte;
    } else {
      free(string.buffer);
      string.buffer = NULL;
      errorHandler(UTF8_ERROR_MSG);
    }

    if (string.length >= string.size) {
      string.size << 1;
      uchar* buffer = (uchar*)malloc(sizeof(uchar) * string.size);
      memcpy(buffer, string.buffer, string.length);
      free(string.buffer);
      string.buffer = buffer;
    }
    string.buffer[string.length++] = ch;
  }

  return string;
}

// unicode转utf-8
void printuchar(uchar ch) {
  char temp[5];

  if (ch < 0x80) {
    putchar(ch);
  } else if (ch < 0x800) {
    temp[0] = (6 << 5) + ((ch & 0x7c0) >> 6);
    temp[1] = (2 << 6) + (ch & 0x3f);
    temp[2] = 0;
    printf("%s", temp);
  } else if (ch < 0x10000) {
    temp[0] = (14 << 4) + ((ch & 0xf000) >> 12);
    temp[1] = (2 << 6) + ((ch & 0xfc0) >> 6);
    temp[2] = (2 << 6) + (ch & 0x3f);
    temp[3] = 0;
    printf("%s", temp);
  } else if (ch < 0x110000) {
    temp[0] = (0x1e << 3) + ((ch & 0x1c0000) >> 18);
    temp[1] = (2 << 6) + ((ch & 0x3f000) >> 12);
    temp[2] = (2 << 6) + ((ch & 0xfc0) >> 6);
    temp[3] = (2 << 6) + (ch & 0x3f);
    temp[4] = 0;
    printf("%s", temp);
  }
}
  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值