UTF-8 转 UNICODE , C 代码 , 自用 , 转发注明出处

UTF-8 转 UNICODE , C 代码 , 自用 , 转发注明出处


2020/12/5 修改4字节转化代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "stdint.h"
#include "stdlib.h"
#include "string.h"
#include "stdio.h"
#pragma warning(disable:4996)

#define UNICODE_1_BYTE_MASK 0x80
#define UNICODE_1_BYTE_MASK_VALUE 0X00
#define UNICODE_2_BYTE_MASK 0xe0
#define UNICODE_2_BYTE_MASK_VALUE 0XC0
#define UNICODE_3_BYTE_MASK 0xf0
#define UNICODE_3_BYTE_MASK_VALUE 0XE0
#define UNICODE_4_BYTE_MASK 0xf0
#define UNICODE_4_BYTE_MASK_VALUE 0XF0
#define UNICODE_COMMON_MASK 0XC0
#define UNICODE_COMMON_MASK_VALUE 0X80


	 void toUnicode(char* a, char* r) {
		 char* temp;
		 temp = (char*)malloc(strlen(a)*6);
		 if (temp == NULL) {
			 return;
		 }
		 uint32_t index = 0;
		 uint32_t index2 = 0;
		 uint32_t unicode = 0;
		 uint32_t len = 0;
		 while(*(uint8_t *)(a+index2)!=0) {
			 
			 if ((*(uint8_t*)(a + index2) & (UNICODE_1_BYTE_MASK)) == UNICODE_1_BYTE_MASK_VALUE) {// 1byte
				 printf(" %02x \n", *(uint8_t*)(a + index2));
				 unicode = *(uint8_t*)(a + index2);
				 len = sprintf(temp + index, "\\u%04x", unicode);
				 index2 += 1;
			 }
			 else if ((*(uint8_t*)(a + index2) & (UNICODE_2_BYTE_MASK)) == UNICODE_2_BYTE_MASK_VALUE) {//2byte
				 printf(" %02x %02x \n", *(uint8_t*)(a + index2) , *(uint8_t*)(a + index2+1));
				 if ((*(uint8_t*)(a + index2 + 1) & (UNICODE_COMMON_MASK)) == UNICODE_COMMON_MASK_VALUE) {
					 unicode = (((*(uint8_t*)(a + index2)) & 0x1f) << 6) | ((*(uint8_t*)(a + index2+1)) & 0x3f);
					 len = sprintf(temp + index, "\\u%04x", unicode);
					 index2 += 2;
				 }
				 else {
					 printf("error 2");
					 break;
					 
					 //errors
				 }
			 }
			 else if ((*(uint8_t*)(a + index2) & (UNICODE_3_BYTE_MASK)) == UNICODE_3_BYTE_MASK_VALUE) { //3byte
				 printf(" %02x %02x %02x \n", *(uint8_t*)(a + index2), *(uint8_t*)(a + index2 + 1), *(uint8_t*)(a + index2 + 2));
				 if (((*(uint8_t*)(a + index2 + 1) & (UNICODE_COMMON_MASK)) == UNICODE_COMMON_MASK_VALUE) &&((*(uint8_t*)(a + index2 + 2) & (UNICODE_COMMON_MASK)) == UNICODE_COMMON_MASK_VALUE)){
					 unicode = (((*(uint8_t*)(a + index2)) & 0x0f) << 12) | (((*(uint8_t*)(a + index2 + 1)) & 0x3f)<<6)| ((*(uint8_t*)(a + index2 + 2)) & 0x3f) ;
					 len = sprintf(temp + index, "\\u%04x", unicode);
					 index2 += 3;
				 }
				 else {
					 printf("error 3");
					 break;
					 //errors
				 }
			 }
			 else if ((*(uint8_t*)(a + index2) & (UNICODE_4_BYTE_MASK)) == UNICODE_4_BYTE_MASK_VALUE) {
				 printf(" %02x %02x %02x %02x\n", *(uint8_t*)(a + index2), *(uint8_t*)(a + index2 + 1), *(uint8_t*)(a + index2 + 2), *(uint8_t*)(a + index2 + 3));
				 if (((*(uint8_t*)(a + index2 + 1) & (UNICODE_COMMON_MASK)) == UNICODE_COMMON_MASK_VALUE) && ((*(uint8_t*)(a + index2 + 2) & (UNICODE_COMMON_MASK)) == UNICODE_COMMON_MASK_VALUE) && ((*(uint8_t*)(a + index2 + 3) & (UNICODE_COMMON_MASK)) == UNICODE_COMMON_MASK_VALUE)) {
					 unicode = (((*(uint8_t*)(a + index2)) & 0x0f) << 18)|(((*(uint8_t*)(a + index2+1)) & 0x3f) << 12) | (((*(uint8_t*)(a + index2 + 2)) & 0x3f) << 6) | ((*(uint8_t*)(a + index2 + 3)) & 0x3f);
					 len = sprintf(temp + index, "\\u%04x", unicode);
					 index2 += 4;
				 }
				 else {
					 printf("error 4");
					 break;
					 //errors
				 }
			 }
			 else {
				 printf("error 5");
				 break;
			 }

			 index += len;
		 }
		 strcpy(r, temp);
		 free(temp);
	 }



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值