初识Java的字符编码格式。做了一下练习。
将字符串分别以GBK、UTF-8、ISO-8859-1三种编码格式输出到文件中。
环境:JRE:1.6.0_02 JVM的默认字符集:GBK
主要函数:
1)获取字符串的指定编码字节流:byte[] String.getBytes(String charSetName);
2)根据指定编码的字节流构造字符串:String(byte[],charSetName);
由于ISO-8859-1编码不支持中文等非英语字符编码,所以字符串先转换成GBK字节流后再转成的ISO-8859-1。
package com.kevin.stringcoding.file;
/**
* 输出流接口
* @author xxx
*
*/
public interface IFileStream4String {
/**
* 将文件中的字节流读到字符串
* @param path
* @return
*/
public String read4String(String path);
/**
* 将字符串写入字节流文件
* @param path
* @param content
*/
public void write4String(String path, String content);
}
package com.kevin.stringcoding.file;
import java.io.FileInputStream;
import java.io.FileOutputStream;
/**
* 通用字节流读写文件的基类
* 子类需要实现abstract String getCharsetName();
* @author xxx
*
*/
public abstract class ByteFileStream4String implements IFileStream4String {
@Override
public String read4String(String path) {
FileInputStream in = null;
try{
in = new FileInputStream(path);
int size = in.available();
byte [] bytes = new byte[size];
size = in.read(bytes);
return new String(bytes,getCharsetName());
}catch(Exception e){
e.printStackTrace();
return "";
}finally{
FileUtil.closeInputStream(in);
}
}
@Override
public void write4String(String path, String content) {
FileOutputStream out = null;
try{
out = new FileOutputStream(path);
byte [] bytes = content.getBytes(getCharsetName());
out.write(bytes);
}catch(Exception e){
e.printStackTrace();
}finally{
FileUtil.closeOutputStream(out);
}
}
abstract String getCharsetName();
}
package com.kevin.stringcoding.file;
/**
* 输出ISO编码字节流文件
* @author xxx
*
*/
public class UTF8FileStream4String extends ByteFileStream4String{
@Override
String getCharsetName() {
return "UTF-8";
}
}
package com.kevin.stringcoding.file;
/**
* 输出GBK编码字节流文件
* @author xxx
*
*/
public class GBKFileStream4String extends ByteFileStream4String {
@Override
String getCharsetName() {
return "GBK";
}
}
package com.kevin.stringcoding.file;
/**
* 输出ISO编码字节流文件
* 由于ISO-8859-1编码不支持中文等非英语字符编码,
* 所以字符串先转换成GBK字节流后再转成的ISO-8859-1。
*
* 读与写相反
* @author xxx
*
*/
public class ISOFileStream4String extends ByteFileStream4String {
@Override
public String read4String(String path) {
try{
String tmp = super.read4String(path);
String [] strs = tmp.split("%");
byte [] tmpBytes = new byte[strs.length];
for(int i = 0;i<strs.length;++i){
tmpBytes[i] = Byte.valueOf(strs[i]);
}
return new String(tmpBytes,"GBK");
}catch(Exception e){
e.printStackTrace();
return "";
}
}
@Override
public void write4String(String path, String content) {
try{
byte [] gbkBytes = content.getBytes("GBK");
StringBuffer isoStr = new StringBuffer();
for(byte b : gbkBytes){
isoStr.append(String.valueOf(b)+"%");
}
super.write4String(path, isoStr.substring(0,isoStr.length()-1));
}catch(Exception e){
e.printStackTrace();
}
}
@Override
String getCharsetName() {
return "ISO-8859-1";
}
}
测试代码
package com.kevin.stringcoding.file;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
public class TestCharSetDemo {
IFileStream4String stream = null;
String content = "Hello 中国";
@Test
public void testSave2UTF8(){
stream = new UTF8FileStream4String();
String path1 = "utf8.dat";
stream.write4String(path1,content);
String content1 = stream.read4String(path1);
assertEquals(content, content1);
}
@Test
public void testSave2GBK(){
stream = new GBKFileStream4String();
String path1 = "gbk.dat";
stream.write4String(path1,content);
String content1 = stream.read4String(path1);
assertEquals(content, content1);
}
@Test
public void testSave2ISO(){
stream = new ISOFileStream4String();
String path1 = "iso.dat";
stream.write4String(path1,content);
String content1 = stream.read4String(path1);
assertEquals(content, content1);
}
}