1.简介
通过程序证明JAVA的char内部编码为UTF-16,而与Charset.defaultCharset()无关。
2.程序
package com.siyuan.jdk.test;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Arrays;
public class CharCodeTest {
/**
* 将byte转换为对应的二进制字符串
* @param src 要转换成二进制字符串的byte值
* @return
*/
public static String byteToBinary(byte src) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < 8; i++) {
result.append(src%2 == 0 ? '0' : '1');
src = (byte)(src >>> 1);
}
return result.reverse().toString();
}
public static void main(String[] args) throws UnsupportedEncodingException {
String str = "I AM 中国人";
System.out.println(Charset.defaultCharset());
byte[] utf16 = str.getBytes("UTF-16");
System.out.println(Arrays.toString(utf16));
for (int i = 0; i < str.length(); i++) {
System.out.println(str.charAt(i));
byte high = (byte)(str.charAt(i) >>> 8);
byte low = (byte) str.charAt(i);
System.out.println(byteToBinary(high) + byteToBinary(low));
System.out.println(byteToBinary(utf16[2+2*i]) + byteToBinary(utf16[2+2*i+1]));
System.out.println((byteToBinary(high) + byteToBinary(low)).equals(byteToBinary(utf16[2+2*i]) + byteToBinary(utf16[2+2*i+1])));
}
}
}
3.运行结果
GBK
[-2, -1, 0, 73, 0, 32, 0, 65, 0, 77, 0, 32, 78, 45, 86, -3, 78, -70]
I
0000000001001001
0000000001001001
true
0000000000100000
0000000000100000
true
A
0000000001000001
0000000001000001
true
M
0000000001001101
0000000001001101
true
0000000000100000
0000000000100000
true
中
0100111000101101
0100111000101101
true
国
0101011011111101
0101011011111101
true
人
0100111010111010
0100111010111010
true