#include <stdio.h>
#include <stdlib.h>
int utf16_to_utf8(const unsigned short* utf16_str, char** utf8_str) {
int utf8_index = 0;
int utf8_size = 1; // 初始化UTF8字符串大小为1,用于存储字符串结束'\0'的空间
int i = 0;
while (utf16_str[i] != '\0') {
unsigned int unicode_code = utf16_str[i];
if (unicode_code >= 0xD800 && unicode_code <= 0xDBFF && utf16_str[i+1] != '\0') {
unsigned int surrogate_pair_code = utf16_str[i+1];
if (surrogate_pair_code >= 0xDC00 && surrogate_pair_code <= 0xDFFF) {
unicode_code = ((unicode_code - 0xD800) << 10) + (surrogate_pair_code - 0xDC00) + 0x10000;
i += 1;
}
}
if (unicode_code < 0x80) {
utf8_size += 1;
}
else if (unicode_code < 0x800) {
utf8_size += 2;
}
else if (unicode_code < 0x10000) {
utf8_size += 3;
}
else {
utf8_size += 4;
}
i += 1;
}
*utf8_str = (char*) malloc(utf8_size * sizeof(char));
if (*utf8_str == NULL) {
return 0; // 内存分配失败,返回错误码
}
i = 0;
utf8_index = 0;
while (utf16_str[i] != '\0') {
unsigned int unicode_code = utf16_str[i];
if (unicode_code >= 0xD800 && unicode_code <= 0xDBFF && utf16_str[i+1] != '\0') {
unsigned int surrogate_pair_code = utf16_str[i+1];
if (surrogate_pair_code >= 0xDC00 && surrogate_pair_code <= 0xDFFF) {
unicode_code = ((unicode_code - 0xD800) << 10) + (surrogate_pair_code - 0xDC00) + 0x10000;
i += 1;
}
}
if (unicode_code < 0x80) {
(*utf8_str)[utf8_index++] = unicode_code;
}
else if (unicode_code < 0x800) {
(*utf8_str)[utf8_index++] = ((unicode_code >> 6) & 0x1F) | 0xC0;
(*utf8_str)[utf8_index++] = (unicode_code & 0x3F) | 0x80;
}
else if (unicode_code < 0x10000) {
(*utf8_str)[utf8_index++] = ((unicode_code >> 12) & 0x0F) | 0xE0;
(*utf8_str)[utf8_index++] = ((unicode_code >> 6) & 0x3F) | 0x80;
(*utf8_str)[utf8_index++] = (unicode_code & 0x3F) | 0x80;
}
else {
(*utf8_str)[utf8_index++] = ((unicode_code >> 18) & 0x07) | 0xF0;
(*utf8_str)[utf8_index++] = ((unicode_code >> 12) & 0x3F) | 0x80;
(*utf8_str)[utf8_index++] = ((unicode_code >> 6) & 0x3F) | 0x80;
(*utf8_str)[utf8_index++] = (unicode_code & 0x3F) | 0x80;
}
i += 1;
}
(*utf8_str)[utf8_index] = '\0';
return 1; // 转换成功,返回成功码
}
int main() {
unsigned short utf16_str[] = {0x4F60, 0x597D, 0xFF0C, 0x4E16, 0x754C, 0xFF01, '\0'}; // UTF16编码的字符串
char* utf8_str = NULL;
if (utf16_to_utf8(utf16_str, &utf8_str)) {
printf("UTF8编码的字符串:%s\n", utf8_str);
}
}