/**
* 将一个GBK编码的txt文档转化为UTF-8的XML文件
* @author SUNBIN
*
*/
public class ConvertXML {
public static void main(String[] args) {
getXML("敏感词库大全.txt");
}
public static void getXML(String path){
try {
//获取txt文件
File file = new File(path);
//获取xml文件
File xmlFile = new File("src/sensitive.xml");
if(!xmlFile.exists()){//不存在则创建文件
xmlFile.createNewFile();
}
//InputStreamReader读取GBK文件
InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "GBK");
//OutputStreamWriter输出UTF-8文件
OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(xmlFile), "UTF-8");
//包装一下,以便一行一行读写文件
BufferedReader bufr = new BufferedReader(isr);
BufferedWriter bufw = new BufferedWriter(osw);
String line = null;
bufw.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"+"\n");//xml头
bufw.write("<sensitiveWords>"+"\n");//根标签
int id=1;
while((line = bufr.readLine())!=null){//读一行写一行
if(!"".equals(line.trim())){
bufw.write("\t");
bufw.write("<sensitiveWord id=\""+(id++)+"\">");
bufw.write(line);
bufw.write("</sensitiveWord>"+"\n");
}
}
bufw.write("</sensitiveWords>");//根结束标签
bufw.flush();
bufr.close();
bufw.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Java使用UNICODE编码,可以在读文件时使用GBK,此时内存中存在的是GBK转化为UNICODE储存的,写出时采用UTF-8写出(UNICODE转化为UTF-8)。
同理:我们可以利用java的这一特性进行各种不同规范的编码转化.