java8 字符串使用 UTF-16
public class Hex {
public static void main(String[] args) {
// codePoint 和 代理对的转换
int codePoint = 0x26b99;
System.out.println(new String(new int[]{codePoint}, 0, 1)); // 𦮙
System.out.println("\uD85A\uDF99"); // 𦮙
char[] chars = Character.toChars(codePoint);
System.out.println(String.format("%#x %#x", (int) chars[0], (int) chars[1])); // 0xd85a 0xdf99
System.out.println(new String(chars)); // 𦮙
boolean highSurrogate = Character.isHighSurrogate(chars[0]); // true
System.out.println(highSurrogate);
boolean lowSurrogate = Character.isLowSurrogate(chars[1]); // true
System.out.println(lowSurrogate);
int toCodePoint = Character.toCodePoint(chars[0], chars[1]);
System.out.println(Integer.toHexString(toCodePoint)); // 26b99
int codePointAt = Character.codePointAt(chars, 0);
System.out.println(Integer.toHexString(codePointAt)); // 26b99
boolean supplementaryCodePoint = Character.isSupplementaryCodePoint(codePoint);
System.out.println(supplementaryCodePoint); // true
boolean bmpCodePoint = Character.isBmpCodePoint(codePoint);
System.out.println(bmpCodePoint); // false
boolean validCodePoint = Character.isValidCodePoint(codePoint);
System.out.println(validCodePoint); // true
int charCount = Character.charCount(codePoint);
System.out.println(charCount); // 2
//-----------------------------------------------------
// 遍历字符串
String str = "你好\uD85A\uDF99你好";
for (int offset = 0; offset < str.length(); ) {
int cp = str.codePointAt(offset);
System.out.println(Integer.toHexString(cp));
offset += Character.charCount(cp);
}
}
}
unicode 转 UTF-8
bytes | bits | representation |
---|---|---|
1 | 7 | 0vvvvvvv |
2 | 11 | 110vvvvv 10vvvvvv |
3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv |
4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv |
unicode 转 UTF-16
如果U<0x10000,U的UTF-16编码就是U对应的16位无符号整数(为书写简便,下文将16位无符号整数记作WORD)。
如果U≥0x10000,我们先计算U’=U-0x10000,然后将U’写成二进制形式:yyyy yyyy yyxx xxxx xxxx,U的UTF-16编码(二进制)就是:110110yyyyyyyyyy 110111xxxxxxxxxx。
putwchar(c)
{
if (c > 0xFFFF) {
putwchar (0xD7C0 + (c >> 10));
putwchar (0xDC00 | c & 0x3FF);
}
参考链接
https://www.zhihu.com/question/308677093/answer/1033468482
https://www.ibm.com/developerworks/library/j-unicode/j-unicode-pdf.pdf
https://www.jianshu.com/p/235ad9c63cf2
https://www.cnblogs.com/dragon2012/p/5020259.html
https://www.utf8-chartable.de/
https://icu-project.org/docs/papers/forms_of_unicode/
https://www.zhihu.com/question/27562173/answer/37188642