有关字符编码的记录

最新推荐文章于 2021-07-31 09:29:27 发布

dayday_up28

最新推荐文章于 2021-07-31 09:29:27 发布

阅读量465

点赞数

分类专栏： java学习笔记

本文链接：https://blog.csdn.net/dayday_up28/article/details/72835198

版权

java学习笔记专栏收录该内容

16 篇文章 0 订阅

订阅专栏

中国的Unicode码是4e2d56fd

GB2312码是d6d0b9fa

以后遇到这种问题用中国两个字试一下

public class TestCharactorEncoding {
	public static void main(String [] args)throws Exception
	{
		String s = new String("中国");//java中的字符采用Unicode编码
		//1打印出每个字符的Unicode码
			for(int i=0; i<s.length(); i++)
			{
				//System.out.println((int)s.charAt(i));//取出第i的字符转成int就是Unicode码
				//查看整数的十六进制
				System.out.print(
						Integer.toHexString((int)s.charAt(i)));
			}
			System.out.println();
		//2得到GB2312码
			
			byte[] buf = s.getBytes("gb2312");//将Unicode字符转换为GB2312，存放到数组中返回，参数缺省就是本地字符集
			//这个过程称为编码，在jdk包中必须有字符集编码器，就会去找到gb2312 的字符集编码器类，绝大多数本地字符集都能找到
			for(int i=0; i<buf.length; i++)//依次打印出来
			{
				System.out.print(Integer.toHexString(buf[i]));
			}//负数转化为int型高字节全为1
			System.out.println();

		//3.将数组写入输出流
			for(int i=0; i<buf.length; i++)
			{
				System.out.write(buf[i]);
			}
			System.out.println();//自动调用flush
			System.out.println("中国");//本地字符集是GB2312所以自动打印出来中国
			
		//4.查看缺省字符集作为环境属性设置
			System.getProperties().list(System.out);//将所有属性名的值列表到输出流上
		//5.修改缺省字符集
			System.setProperty("file.encoding", "iso8859-1");
			System.getProperties().list(System.out);//将所有属性名的值列表到输出流上
	}
}

public class TestDecode {
	public static void main(String [] args) throws Exception
	{
		//System.getProperties().put("file.encoding", "iso8859-1");//将系统的缺省字符集改为ios8859-1，如果改了，下面的中文字符应该打印不出来啊，但是能打印不知道为什么
		//System.setProperty("file.encoding", "iso8859-1");//同上
		//System.getProperties().list(System.out);//将所有属性名的值列表到输出流上

		System.out.println("Please input :");
		byte[] b = new byte[1024];
		int pos = 0;
		String s ;
		while(true)
		{
			int ch = System.in.read();//一个一个字符的读取
			System.out.println(Integer.toHexString(ch));//依次打印每一个字符,这个就是根据本地字符集打印的结果，是gb2312码，da是斜杠换行
			switch (ch)
			{
			case '\r':
				break;
			case '\n':
				s = new String(b, 0, pos);//按gb2312解码成的字符串，依次打印每个字符unicode码
				for(int i=0; i<s.length(); i++)
				{
					System.out.println(Integer.toHexString(s.charAt(i)));
				}
				System.out.println(s);
				//System.out.println(new String(s.getBytes("iso8859-1"),"gb2312"));//如果得到的s是ios8859-1的字符串，可以先编码再以gb2312解码，就能得到中文字符
				//如果unicode的字符串按iso8859-1编码成字节数组，不能反向解码成字符串，因为“中”的unicode占两个字节，按iso8859-1编码只能生成一个字节，丢掉了一些信息。
				break;
			default:
				b[pos++] = (byte)ch;
			}
		}
	}
}