简体中文转繁体中文的工具,包括:1、编码转换(GBK->big5) 2、语义转换(根据词库,需要词库的请EMail联系我)
package i18n.converter; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; /** * Author: zhusheng3@126.com */ public class Gbk2Big5Converter { protected Hashtable s2thash = new Hashtable(); static String[] dictFiles = new String[] { "mappings_gbk2big5_1-1.txt", "mappings_gbk2big5_phrase.txt" }; String dataline; public Gbk2Big5Converter() { s2thash = getHashDict(); } /* * 根据GBK的词典生成简转繁的对应关系,包括词组和单字 */ public Hashtable getHashDict() { Hashtable hashDict = new Hashtable(); BufferedReader br = null; for (String filename : dictFiles) { try { InputStream dictStream = getClass().getResourceAsStream( filename); br = new BufferedReader( new InputStreamReader(dictStream, "gbk")); String line = null; while ((line = br.readLine()) != null) { if (line.length() < 3 || line.charAt(0) == '#') { continue; } int idx = line.indexOf(","); if (idx > 0) { String src = line.substring(0, idx).trim(); String tgt = line.substring(idx + 1).trim(); if (hashDict.get(src) != null) { hashDict.remove(src); } hashDict.put(src, tgt); } } } catch (Exception ex) { ex.printStackTrace(); } finally { if (br != null) { try { br.close(); } catch (IOException e) { } } } } return hashDict; } /* * 利用词典对一个字符串进行替换 */ public String convertString(String inline) { StringBuffer outline = new StringBuffer(inline); convertStringBuffer(outline); return outline.toString(); } /* * 利用词典对一个StringBuffer进行替换 */ public void convertStringBuffer(StringBuffer dataline) { String lin = dataline.toString(); // System.out.println("before:" + lin); int startPostion = 0; String currchar; char charvalue; for (int beginChar = startPostion; beginChar <= dataline.length(); beginChar++) { String newStr = ""; // System.out.println("开始位置beginChar:" + beginChar); // 在该位置下的子串最长度 int maxLengthOfSubstr = dataline.length() - beginChar; // System.out.println("在该位置下的子串最大长度:" + maxLengthOfSubstr); // 找出所有子串 for (int currentLen = maxLengthOfSubstr; currentLen >= 1; currentLen--) { // 英文字符不用匹配,直接跳出 if (isSingleByte(dataline.substring(beginChar, beginChar + 1))) { // System.out.println(inputString.substring(beginChar,beginChar+1)); break; } // 取得当前子串 if (beginChar + currentLen <= dataline.length()) { // 当前子串 String subStr = dataline.substring(beginChar, beginChar + currentLen); // System.out.println("当前子串:" + subStr); if (s2thash.get(subStr) != null) { // System.out.println("找到匹配:" + subStr + "->"+ // s2thash.get(subStr)); newStr = s2thash.get(subStr).toString(); dataline.replace(beginChar, beginChar + currentLen, s2thash.get(subStr).toString()); String after = dataline.toString(); // System.out.println("本次替换后的字符串:" + after); if (beginChar + newStr.length() < dataline.length()) { // System.out.println("替换完成后开始字符:"+ // dataline.charAt(beginChar+ newStr.length())); } else { // System.out.println("本字符串没有新字符可以替换了!"); } beginChar = beginChar + newStr.length() - 1; // 找到匹配后,就不用继续往下找本起始字符下的更短的字符串了 // System.out.println("找到匹配后,就不用继续往下找本起始字符下的更短的字符串了"); break; } } } if (beginChar >= dataline.length()) break; } // System.out.println("after:" + dataline.toString()); } /* * 把目标文件或者文件夹(sourcedir,gbk编码)转成big5编码, 并另存为目标文件夹(targetdir,big5编码) */ public void convertFile(String sourcedir, String targetdir) { int source_encoding = 0; int target_encoding = 4; BufferedReader srcbuffer; BufferedWriter outbuffer; String dataline; Vector inputfiles = new Vector(); Vector outputfiles = new Vector(); inputfiles.add(sourcedir); outputfiles.add(targetdir); int i, j, working_encoding; File tmpfile, tmpout; String dirfiles[]; for (i = 0; i < inputfiles.size(); i++) { tmpfile = new File((String) inputfiles.get(i)); if (tmpfile.exists() == false) { System.out.println("ERROR: Source file " + (String) inputfiles.get(i) + " does not exist./n"); continue; } if (tmpfile.isDirectory() == true) { tmpout = new File((String) outputfiles.get(i)); if (tmpout.exists() == false) { tmpout.mkdir(); } dirfiles = tmpfile.list(); if (dirfiles != null) { for (j = 0; j < dirfiles.length; j++) { inputfiles.add((String) inputfiles.get(i) + File.separator + dirfiles[j]); outputfiles.add((String) outputfiles.get(i) + File.separator + dirfiles[j]); } } continue; } System.out.println("Converting " + inputfiles.get(i) + " to " + outputfiles.get(i) + " with encoding " + source_encoding); try { working_encoding = source_encoding; srcbuffer = new BufferedReader(new InputStreamReader( new FileInputStream((String) inputfiles.get(i)), "gbk")); outbuffer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream((String) outputfiles.get(i)), "big5")); while ((dataline = srcbuffer.readLine()) != null) { outbuffer.write(convertString(dataline)); outbuffer.newLine(); } srcbuffer.close(); outbuffer.close(); } catch (Exception ex) { System.err.println(ex); } } } public File convertSimpleString(String inputString) { // System.out.println("before->inputString:"+inputString); byte[] bytes = inputString.getBytes(); StringBuffer sb = new StringBuffer(); // write the string to a temp file File result = new File("temp.txt"); try { InputStream inputStream = new ByteArrayInputStream(inputString .getBytes()); BufferedReader srcbuffer = new BufferedReader( new InputStreamReader(inputStream, "gbk")); BufferedWriter outbuffer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(result), "big5")); while ((dataline = srcbuffer.readLine()) != null) { outbuffer.write(convertString(dataline)); outbuffer.newLine(); } srcbuffer.close(); outbuffer.close(); } catch (Exception e) { e.printStackTrace(); } // System.out.println("after->result:"+result); return result; } /* * 判断是否单字节字,中文都不是单字节字 */ public static boolean isSingleByte(String inStr) { if (inStr.getBytes().length == inStr.length()) { return true; } else { return false; } } public void printDict() { Gbk2Big5Converter aConverter = new Gbk2Big5Converter(); for (int i = 20; i > 0; i--) { Enumeration enums = aConverter.s2thash.keys(); while (enums.hasMoreElements()) { String ele = (String) enums.nextElement(); if (ele.length() == i) { System.out.print(ele); System.out.println("," + aConverter.s2thash.get(ele)); } } } } public static void main(String[] args) { Gbk2Big5Converter aGbk2Big5Converter = new Gbk2Big5Converter(); String src = "src//resource_zh_CN.properties.org"; String tgt = "src//resource_zh_TW.properties.org"; System.out.println(new File(src).getAbsolutePath()); aGbk2Big5Converter.convertFile(src, tgt); } }