Android 学习笔记 (1)
Unicode编码的byte 数组转为String
package com.xbfax.ibor.wxapi;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
public class Test {
public static void main(String[] args) {
String s = "12345678";
System.out.println(s);
String u = "\\u0031\\u0032\\u0033\\u0034\\u0035\\u0036\\u0037\\u0038";
System.out.println(u);
try {
byte[] bytes1 = s.getBytes("UNICODE");
byte[] bytes2 = s.getBytes("UTF-16LE");
byte[] bytes3 = s.getBytes("UTF-16BE");
// System.out.println(Arrays.toString(bytes1));
// System.out.println(Arrays.toString(bytes2));
// System.out.println(Arrays.toString(bytes3));
System.out.println(Arrays.toString(toHexArr(bytes1)));
System.out.println(Arrays.toString(toHexArr(bytes2)));
System.out.println(Arrays.toString(toHexArr(bytes3)));
System.out.println(new String(bytes1));
System.out.println(new String(bytes2));
System.out.println(new String(bytes3));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
static String[] toHexArr(byte[] bytes) {
String[] hexArr = new String[bytes.length];
for (int i = 0; i < bytes.length; i++) {
String s = Integer.toHexString(bytes[i]);
if (s.length() == 1) {
s = "0" + s;
}
s = "0x" + s;
hexArr[i] = s;
}
return hexArr;
}
}
当直接使用Unicode 作为字符集的时候转换的byte 数组会多出来两个字节,原因是在 Java 中直接使用Unicode 转码时会按照 UTF-16LE 的方式拆分,并加上 BOM(Byte Order Mark)。如果采用 UTF-16 拆分,在 Java 中默认采用带有 BOM 的 UTF-16BE 拆分。so 想去掉多出来的两个字节可以用UTF-16LE或者UTF-16BE来替换直接使用Unicode。