java字符编码实验

 

 

 

 

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

public class Test {

	/**
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws IOException {
		String str = "Hello台灣-台湾-중국어123-!@#-,./";
		//String str = "Hello台灣-台湾123-!@#-,./";
		Set<String> charSetKey = Charset.availableCharsets().keySet();
		List<String> supportChinaCharsetList = new ArrayList<String>();
		for (String charSet : charSetKey) {
			try {
				String result = new String(str.getBytes(charSet), charSet);
				System.out.println(charSet+":   " + result);
				if (result.equals(str)) {
					supportChinaCharsetList.add(charSet);
				}
			} catch (Exception e) {
				System.out.println(charSet + " UnsupportedEncoding");
			}
		}

		System.out.println(supportChinaCharsetList);

	}

	static void print(byte[] bytes) {
		for (byte b : bytes) {
			System.out.print(b);
		}
		System.out.println();
	}

}

 运行结果如下:

String : Hello台灣-台湾123-!@#-,./ getBytes By Big5 is :72101108108111-91120-588745-91120634950514533643545444647

Hello台灣-台?123-!@#-,./

String : Hello台灣-台湾123-!@#-,./ getBytes By Big5-HKSCS is :72101108108111-91120-588745-91120634950514533643545444647

Hello台灣-台?123-!@#-,./

String : Hello台灣-台湾123-!@#-,./ getBytes By EUC-JP is :72101108108111-62-26-33-4445-62-26-49-474950514533643545444647

Hello台灣-台湾123-!@#-,./

.

.

.

.

String : Hello台灣-台湾123-!@#-,./ getBytes By x-windows-50221 is :72101108108111273666661029584274066452736666610279812740664950514533643545444647

Hello台灣-台湾123-!@#-,./

String : Hello台灣-台湾123-!@#-,./ getBytes By x-windows-874 is :7210110810811163634563634950514533643545444647

Hello??-??123-!@#-,./

String : Hello台灣-台湾123-!@#-,./ getBytes By x-windows-949 is :72101108108111-9-69-40-6745-9-69634950514533643545444647

Hello台灣-台?123-!@#-,./

String : Hello台灣-台湾123-!@#-,./ getBytes By x-windows-950 is :72101108108111-91120-588745-91120634950514533643545444647

Hello台灣-台?123-!@#-,./

String : Hello台灣-台湾123-!@#-,./ getBytes By x-windows-iso2022jp is :72101108108111273666661029584274066452736666610279812740664950514533643545444647

Hello台灣-台湾123-!@#-,./

 

支持中文编码集合

[EUC-JP, GB18030, GBK, ISO-2022-JP, Shift_JIS, UTF-16, UTF-16BE, UTF-16LE, UTF-8, windows-31j, x-euc-jp-linux, x-EUC-TW, x-eucJP-Open, x-IBM33722, x-IBM930, x-IBM939, x-IBM942, x-IBM942C, x-IBM943, x-IBM943C, x-mswin-936, x-PCK, x-windows-50220, x-windows-50221, x-windows-iso2022jp]


 

 

 

再看下面这个例子,如果大家把这个弄清楚,编码问题应该不是问题了

import java.io.UnsupportedEncodingException;

public class A {
	public static void main(String[] args) throws UnsupportedEncodingException {
		/**
		 *  -52 -88 -51 -27 
			-52 -88 -51 -27 
			-52 -88 -51 -27 
			-52 -88 -51 -27 
			台湾
		 */
		transfer("台湾","gb2312","ISO-8859-1");
		
		/**
		 *  -52 -88 63 
			-52 -88 63 
			-52 -88 63 
			-52 -88 63 
			台?
		 */
		transfer("台灣","gb2312","ISO-8859-1");
		
		/**
		 *  -52 -88 -98 -77 
			-52 -88 -98 -77 
			-52 -88 -98 -77 
			-52 -88 -98 -77 
			台灣
		 */
		transfer("台灣","gbk","ISO-8859-1");
		
		/**
		 * 
			63 63 
			63 63 
			63 63 
			63 63 
			??
		 */
		transfer("台灣","ISO-8859-1","gbk");
		
		/**
		 * -27 -113 -80 -25 -127 -93 
			-27 -113 -80 -25 -127 -93 
			-27 -113 -80 -25 -127 -93 
			-27 -113 -80 -25 -127 -93 
			台灣
		 */
		transfer("台灣","utf-8","ISO-8859-1");
		
		/**
		 * 63 63 
			63 63 
			63 63 
			63 63 
			??
		 */
		transfer("台灣","ISO-8859-1","utf-8");
		
		/**
		 * -27 -113 -80 -25 -127 -93 
			-27 -113 -80 -25 -127 -93 
			-27 -113 -80 -25 -127 -93 
			-27 -113 -80 -25 -127 -93 
			台灣
		 */
		transfer("台灣","utf-8","gbk");
		
		/**
		 *  -52 -88 -98 -77 
			-52 -88 -17 -65 -67 -17 -65 -67 
			-52 -88 -17 -65 -67 -17 -65 -67 
			-52 -88 -17 -65 -67 -17 -65 -67 
			台锟斤拷
		 */
		transfer("台灣","gbk","utf-8");
		
	}

	static void transfer(String str, String set1, String set2) throws UnsupportedEncodingException {
		//encode
		//把str使用set1编码获取bytes,然后把bytes使用set2编码转换成字符串
		String set2String = new String(str.getBytes(set1), set2);
		//打印str的set1编码bytes
		print(str.getBytes(set1));
		//打印转换后的set2编码的bytes
		print(set2String.getBytes(set2));

		//decode
		//使用set2的编码获取原始bytes,然后使用set1编码转换成字符串
		String set1String = new String(set2String.getBytes(set2), set1);
		print(set2String.getBytes(set2));
		print(set1String.getBytes(set1));
		System.out.println(set1String);
		
		System.out.println();
		System.out.println();
	}

	static void print(byte[] bys) {
		for (byte b : bys) {
			System.out.print(b+" ");
		}
		System.out.println();
	}

}
 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值