字符转换测试:
package a;
public class TestCharset {
public static void main(String[] args) throws Exception {
String str = "中国renmin";
String str1 = new String(str.getBytes(), ChangeCharset.UTF_8);
String str3 = new String(str1.getBytes(), ChangeCharset.GBK);
String str2 = new String(str.getBytes(), ChangeCharset.GBK);
String str4 = new String(str2.getBytes(), ChangeCharset.UTF_8);
String str5 = new String(str4);
String str6 = new String(str4.getBytes(), ChangeCharset.GBK);
System.out.println(str + " str " + bytesToHexString(str));
System.out.println(str1 + " str utf8 " + bytesToHexString(str1));
System.out.println(str3 + " str utf8 gbk " + bytesToHexString(str3));
System.out.println(str2 + " str gbk " + bytesToHexString(str2));
System.out.println(str4 + " str gbk utf8 " + bytesToHexString(str4));
System.out.println(str5 + " new (str gbk utf8) " + bytesToHexString(str5));
System.out.println(str6 + " new (str gbk utf8) gbk " + bytesToHexString(str6));
prt(str + " str ", str.getBytes());
prt(str1 + " str utf8 ", str1.getBytes());
prt(str3 + " str utf8 gbk ", str3.getBytes());
prt(str2 + " str gbk ", str2.getBytes());
prt(str4 + " str gbk utf8 ", str4.getBytes());
prt(str5 + " new (str gbk utf8) ", str5.getBytes());
prt(str6 + " new (str gbk utf8) gbk ", str5.getBytes());
}
private static String bytesToHexString(String str) {
byte[] src = str.getBytes();
StringBuilder stringBuilder = new StringBuilder("");
if (src == null || src.length <= 0) {
return null;
}
for (byte element : src) {
int v = element & 0xFF;
String hv = Integer.toHexString(v);
if (hv.length() < 2) {
stringBuilder.append(0);
}
stringBuilder.append(hv);
}
return stringBuilder.toString();
}
private static void prt(String str, byte[] bs) {
System.out.print("\n " + str);
for (byte b : bs) {
System.out.print(b + " ");
}
}
}
结果:
中国renmin str d6d0b9fa72656e6d696e
?й?renmin str utf8 3fa7db3f72656e6d696e
?й?renmin str utf8 gbk 3fa7db3f72656e6d696e
中国renmin str gbk d6d0b9fa72656e6d696e
?й?renmin str gbk utf8 3fa7db3f72656e6d696e
?й?renmin new (str gbk utf8) 3fa7db3f72656e6d696e
?й?renmin new (str gbk utf8) gbk 3fa7db3f72656e6d696e
中国renmin str -42 -48 -71 -6 114 101 110 109 105 110
?й?renmin str utf8 63 -89 -37 63 114 101 110 109 105 110
?й?renmin str utf8 gbk 63 -89 -37 63 114 101 110 109 105 110
中国renmin str gbk -42 -48 -71 -6 114 101 110 109 105 110
?й?renmin str gbk utf8 63 -89 -37 63 114 101 110 109 105 110
?й?renmin new (str gbk utf8) 63 -89 -37 63 114 101 110 109 105 110
?й?renmin new (str gbk utf8) gbk 63 -89 -37 63 114 101 110 109 105 110
结论:
1、带有中文的String默认编码格式:GBK
2、由GBK转为UTF-8成功(有正确编码转为错误编码成功)
3、由UTF-8转为GBK失败(由错误编码转回正确编码失败)
4、汉字内容编码格式转换后显示异常
欢迎讨论