下面有三个函数,分别演示了错误编码后的结果,系统的编码(GBK)方式,jvm的编码方式
/**
* java中,字符串在jvm中的存储永远是用unicode存储的,
* 但只要是为人所能看到的地方,如:网页、数据库、编码区、控制台等,都会有相应的编码方式,
* 要想正确的编码、解码就必须显式地指明相应的编码或解码方式
* @author Administrator
*
*/
import java.io.*;
public class JavaEncoded {
public static void main(String[] args) throws Exception {
// isoEncode();
// encodeLenght();
jvmEncode();
}
public static void isoEncode() throws IOException {
InputStreamReader isr = new InputStreamReader(System.in, "iso8859-1");
// Create an InputStreamReader that uses the given charset decoder
BufferedReader br = new BufferedReader(isr);
String strLine = br.readLine();
br.close();
isr.close();
System.out.println(strLine);
System.out.println(new String(strLine.getBytes(), "iso8859-1"));
// Encodes this String (strLine) into a sequence of bytes using the
// platform's
// default charset(gb2312) then constructs a new String by decoding the
// specified array of bytes using the specified charset (iso8859-1)
// because this String (strLine) uses the charset decoder "iso8859-1",so
// it can
// only be encoded by "iso8859-1",cann't be encoded by the platform's
// default
// charset "gb2312",so this line is wrong.
System.out.println(new String(strLine.getBytes("iso8859-1")));
// Encodes this String (strLine) into a sequence of bytes using the
// named
// charset (iso8859-1),then constructs a new String by decoding the
// specified array of bytes using the platform's default charset
// (gb2312).
// This line is right.
}
/**
* 根据系统,所有我们看到的字符都是GBK编码的,
* 中文占两个字节
* 英文占一个字节
* @throws UnsupportedEncodingException
*/
public static void encodeLenght() throws UnsupportedEncodingException {
System.out.println();
char c = '我';
System.out.println(String.valueOf(c).getBytes().length); //根据系统,所有我们看到的字符都是GBK编码的,
System.out.println(String.valueOf(c).getBytes("GBK").length);
byte[] bytes = String.valueOf(c).getBytes();
for (int i = 0; i < bytes.length; i++) {
System.out.print(bytes[i] + " ");
}
System.out.println();
byte[] bytes2 = String.valueOf(c).getBytes("GBK");
for (int i = 0; i < bytes2.length; i++) {
System.out.print(bytes2[i] + " ");
}
System.out.println();
//"我"十六机制GBK:CED2
//转化为二进制(补码)为:11001110 11010010
//反取原码:10110010 10101110
//即-50 -46 ,为byte的输出结果
char c2 = 'A';
System.out.println(String.valueOf(c2).getBytes().length); //根据系统,所有我们看到的字符都是GBK编码的,
System.out.println(String.valueOf(c2).getBytes("GBK").length);
byte[] bytes3 = String.valueOf(c2).getBytes();
for (int i = 0; i < bytes3.length; i++) {
System.out.print(bytes3[i] + " ");
}
System.out.println();
byte[] bytes4 = String.valueOf(c2).getBytes("GBK");
for (int i = 0; i < bytes4.length; i++) {
System.out.print(bytes4[i] + " ");
}
}
/**
* 永的unicode为\u6c38
* 运行结果:
* 6c38
* 永
*/
public static void jvmEncode() {
char han ='永';
System.out.format("%x\n", (short)han);
char han2 = 0x6c38;
System.out.println(han);
}
}
//~