最近做国际化,提取代码的中文出来
import java.io.*; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @Classname Process * @Description * @Created by DELL */ public class Process { private PrintWriter output = null; /** * 看下自己IDEA的文件格式 */ private String charset = "UTF-8"; public Process(PrintWriter output, String charset) { this.output = output; this.charset = charset; } public List<String> getWord(String names){ List<String> word = new ArrayList<>(); List<String> strings = Arrays.asList(names.split("\"")); for (String string : strings) { Pattern p= Pattern.compile("[\u4e00-\u9fa5]"); Matcher m = p.matcher(string); if(m.find()){ word.add(string); } } return word; } public void readTxt(File tempFile) throws IOException { System.out.println(tempFile.getName()); String packageName = Arrays.asList(tempFile.getPath().substring(tempFile.getPath().indexOf("DQMS") +5).split("\\"+File.separator)).get(0); if (tempFile.getName().indexOf(".java")>0){ System.out.println("#" + tempFile.getName() + "\n"); String fileName = tempFile.getName().substring(0,tempFile.getName().lastIndexOf(".")); BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(tempFile), "UTF-8")); String tempString = ""; Pattern p= Pattern.compile("[\u4e00-\u9fa5]"); /** * 行號 */ int i = 1; int j = 001; while ((tempString = reader.readLine()) != null) { tempString = tempString.trim(); Matcher m = p.matcher(tempString); if (tempString.indexOf("*")<0&&tempString.indexOf("//")<0&& tempString.indexOf("@ResourcePermissions")<0&&m.find() &&tempString.indexOf("@OptLogAnnotation")<0&&tempString.indexOf("logger.")<0 &&tempString.indexOf("<!--")<0){ List<String> word = getWord(tempString); for (String s : word) { output.write(tempFile.getName() +"@@@"+ i +"@@@"+tempString +"@@@" +packageName+"_"+fileName +"_"+String.format("%04d" ,j) +"@@@"+s +"\n"); } j++; } i ++; } reader.close(); } } public void readDir(String folder) throws IOException { File dir = new File(folder); if (dir.isDirectory()) { System.out.println("#Dir#" + dir.getName() + "\n"); //output.write("#Dir#" + dir.getName() + "\n"); String[] children = dir.list(); for (int i = 0; i<children.length;i++){ File tempFile = new File(dir, children[i]); if (tempFile.isDirectory()) { readDir(tempFile.getPath()); } else { readTxt(tempFile); } } } } }
public class Test { //输出文件路径 public static String outFile = "D:/ceshi.txt"; //输入文件夹路径 public static String inFolder = "D:\\work"; public static String charset = "UTF-8"; public static void main(String[] args) throws IOException { String tempString = "import java.io.FileWriter;"; tempString = "3试试"; Pattern p= Pattern.compile("[\u4e00-\u9fa5]"); Matcher m = p.matcher(tempString); if (m.find()){ System.out.println(tempString); } PrintWriter output = new PrintWriter(new FileWriter(new File(outFile))); Process process = new Process(output, charset); process.readDir(inFolder); output.close(); } }