#include <stdio.h>
#include <stdlib.h>
// 检测一个字节是否是UTF-8的起始字节
int is_utf8_start_byte(unsigned char byte) {
return ((byte & 0xC0) != 0x80);
}
// 计算一个UTF-8字符所占用的字节数
int utf8_byte_count(unsigned char byte) {
if ((byte & 0x80) == 0) {
return 1;
}
else if ((byte & 0xE0) == 0xC0) {
return 2;
}
else if ((byte & 0xF0) == 0xE0) {
return 3;
}
else if ((byte & 0xF8) == 0xF0) {
return 4;
}
return -1; // 非法UTF-8字节
}
// UTF-8转UTF-16
void utf8_to_utf16(const unsigned char* utf8, unsigned short** utf16, int* utf16_len) {
// 计算UTF-16字符串长度
int len = 0; // UTF-16字符个数
const unsigned char* p = utf8;
while (*p != '\0') {
if (is_utf8_start_byte(*p)) {
int byte_count = utf8_byte_count(*p);
if (byte_count < 0) {
printf("错误:非法UTF-8编码\n"