java国际化--后端读出jsp页面所有中文,然后把中文自动转换成对应code+标签,写入到jsp页面

本文链接：https://blog.csdn.net/a229397620/article/details/103635682
本博文借鉴了其他博主的代码,原博文地址找不到了,不是有意冒犯
根据个人需要进行了一些改良,并增加了替换进jsp的功能,
需要导入下列三个jar包,
chardet.jar
cpdetector_1.0.10.jar
antlr-2.7.6.jar
read方法实现国际化读出所有中文,用于messages_en_US.properties,messages_en_CN.properties这两个配置文件配置
insert方法实现自动将中文转换成<spring:message code="code"/>的格式,
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.RandomAccessFile;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import info.monitorenter.cpdetector.io.ASCIIDetector;
import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
import info.monitorenter.cpdetector.io.JChardetFacade;
import info.monitorenter.cpdetector.io.ParsingDetector;
import info.monitorenter.cpdetector.io.UnicodeDetector;

/**
 * 国际化工具
 * 类描述:    
 * 创建时间:  2019年12月19日 下午6:19:10 
 *    
 */
public class FindFileController {
    
	static int fileCount = 0;//文件总数  
    static int wrong = 0 ;//含有中文字符的文件数     
    static FileOutputStream fos = null;  
    static OutputStreamWriter osw = null;  
	
	public static void main(String[] args){  
        try {  
            //每次重新执行的时候删除上次写入的文件  
            File file = new File("D:\\tomcat\\FileCH.txt");  
            file.delete();  
            //jsp目录
            String filePath = "D:\\tomcat\\views";
            //读出中文存放的目录,用于中英转换配置文件
            fos = new FileOutputStream(new File("D:\\tomcat\\FileCH.txt"),true);  
            //读出所有中文
            //read(filePath);
            //中文转换成标签+code 
            insert(filePath);
            
        } catch (IOException e) {  
            e.printStackTrace();  
        }finally{  
            try {  
                fos.close();  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
    } 
    
    /**
     * 
     * @describe 将所有中文读取出来,并写入到一个文件,用于读出所有中文,进行code配置
     * @param strPath
     * @throws IOException
     */
    private static void read(String strPath) throws IOException {
        File dir = new File(strPath);  
        File[] files = dir.listFiles();  

        if (files == null)  
            return;  
        for (int i = 0; i < files.length; i++) {
            System.out.println("第"+i+"个文件");
            int flag = 0 ;  
            if (files[i].isDirectory()) {  
            	read(files[i].getAbsolutePath());  
            } else {  
                fileCount++;  
                String strFileName = files[i].getAbsolutePath().toLowerCase();  
                //System.out.println(getFileEncode(files[i].getAbsolutePath())+" ----" +files[i].getName());  
                //截取文件格式  
                String  fileName = strFileName.substring(strFileName.lastIndexOf(".")+1,strFileName.length());  
                //排除不需要扫描的文件
                if(fileName.equals("rar") || fileName.equals("jpg") 
                		|| fileName.equals("png") || fileName.equals("jar") 
                		|| fileName.equals("doc") || fileName.equals("xls") 
                		|| fileName.equals("gif") || fileName.equals("wmz")){
                    continue;
                }
                //不知为何  两种方法判断的时候都会吧class文件和jar文件当做是含有中文字符的文件  
                //所以此处排除掉这class文件和jar文件不参与判断  
                if(!"class".equals(fileName.toLowerCase())){  
                    //开始输入文件流，检查文件  
                    String enCode = getFileEncode(files[i].getAbsolutePath());  
                    if("void".equals(enCode)){  
                        enCode="UTF-8";  
                    }if("windows-1252".equals(enCode)){  
                        enCode="GBK";  
                    }  
                    FileInputStream fis = new FileInputStream(files[i].getAbsolutePath());  
                    InputStreamReader in = new InputStreamReader(fis,enCode);  
                    BufferedReader br = new BufferedReader(in);  
                    //用于记录行数  确定文件哪一行有中文  
                    int lineCount = 0 ;  
                    String line = null;  
                    //逐行检查文件  
                    while((line = br.readLine())!=null){  
                        /使用正则表达式进行判断  
                        lineCount++ ;  
                        char[] charArray = line.toCharArray();    
                        for (int k = 0; k < charArray.length; k ++) {    
                            if ((charArray[k] >= 0x4e00) && (charArray[k] <= 0x9fbb)) {  
                                //每行进行扫描，用正则判断含有中文的行
                            	//双节字符正则[^\\x00-\\xff]
                                Pattern p = Pattern.compile("([^\\x00-\\xff]+)");   
                                String mv = "";  
                                //正则判断
                                Matcher m = p.matcher( line ); 
                                //遍历含有中文的行。并取出中文
                                while (m.find()) {    
                                    mv += m.group(0);    
                                } 
                                //将含有中文的文件名称和中文所在行数写入文件夹  
                                osw.write(files[i].getAbsolutePath()+" ------- 第"+lineCount+"行<===>"+mv+"\r\n");
                                osw.flush();  
                                flag ++ ;  
                                //wrong++;  
                                if(flag!=0) k =charArray.length ;  
                            }    

                        }   
                    }  
                    //flag!=0 说明该文件中含有中文  
                    if(flag!=0) wrong++ ;  
                    br.close();  
                    in.close();  
                    fis.close();  
                }  
            }  
        }  
    } 
    /**
     * //检查文件类型  
     * @describe
     * @param path
     * @return
     */
    public static String getFileEncode(String path) {
        /* 
         * detector是探测器，它把探测任务交给具体的探测实现类的实例完成。 
         * cpDetector内置了一些常用的探测实现类，这些探测实现类的实例可以通过add方法 加进来，如ParsingDetector、 
         * JChardetFacade、ASCIIDetector、UnicodeDetector。 
         * detector按照“谁最先返回非空的探测结果，就以该结果为准”的原则返回探测到的 
         * .字符集编码。使用需要用到三个第三方JAR包：antlr.jar、chardet.jar和cpdetector.jar 
         * cpDetector是基于统计学原理的，不保证完全正确。 
         */  
        CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();  
        /* 
         * ParsingDetector可用于检查HTML、XML等文件或字符流的编码,构造方法中的参数用于 
         * .指示是否显示探测过程的详细信息，为false不显示。 
         */  
        detector.add(new ParsingDetector(false));  
        /* 
         * JChardetFacade封装了由Mozilla组织提供的JChardet，它可以完成大多数文件的编码 
         * .测定。所以，一般有了这个探测器就可满足大多数项目的要求，如果你还不放心，可以 
         * .再多加几个探测器，比如下面的ASCIIDetector、UnicodeDetector等。 
         */  
        detector.add(JChardetFacade.getInstance());// 用到antlr.jar、chardet.jar  
        // ASCIIDetector用于ASCII编码测定  
        detector.add(ASCIIDetector.getInstance());  
        // UnicodeDetector用于Unicode家族编码的测定  
        detector.add(UnicodeDetector.getInstance());  
        java.nio.charset.Charset charset = null;  
        File f = new File(path);  
        try {  
            charset = detector.detectCodepage(f.toURI().toURL());  
        } catch (Exception ex) {  
            ex.printStackTrace();  
        }  
        if (charset != null)  
            return charset.name();  
        else  
            return null;  
    }
    
    /**
     * 
  	 * @describe 将所有中文转换成对应的标签+code
  	 * @param strPath
  	 * @throws IOException
  	 */
    public static void insert(String strPath)throws IOException{
    	File dir = new File(strPath);  
        File[] files = dir.listFiles(); 
    	
        //读出国际化中文配置键值对
    	Properties pro=new Properties();
    	//中文配置文件存放路径
    	FileInputStream fis = new FileInputStream(
    			"D:\\MyEclipse-Workspace\\GameBackV1\\src\\main\\resources\\messages_zh_CN.properties");
    	pro.load(fis);//将读取的文件存放到pro对象里(键值对)
    	Set<String> set=pro.stringPropertyNames();//获取所有key集合
        
        if (files == null)  
            return;  
        for (int i = 0; i < files.length; i++) {
            System.out.println("第"+i+"个文件");
            int flag = 0 ;  
            if (files[i].isDirectory()) {
            	//文件夹,则进入文件夹下继续循环
            	insert(files[i].getAbsolutePath());  
            } else {  
                fileCount++;  
                String strFileName = files[i].getAbsolutePath().toLowerCase();  
                //System.out.println(getFileEncode(files[i].getAbsolutePath())+" ----" +files[i].getName());  
                //截取文件格式  
                String  fileName = strFileName.substring(strFileName.lastIndexOf(".")+1,strFileName.length());  
                //排除不需要扫描的文件,只要扫描jsp和java文件
                if(fileName.equals("rar") || fileName.equals("jpg") 
                		|| fileName.equals("png") || fileName.equals("jar") 
                		|| fileName.equals("doc") || fileName.equals("xls") 
                		|| fileName.equals("gif") || fileName.equals("wmz")){
                    continue;
                }
                //不知为何  两种方法判断的时候都会吧class文件和jar文件当做是含有中文字符的文件  
                //所以此处排除掉这class文件和jar文件不参与判断  
                if(!"class".equals(fileName.toLowerCase())){  
                    //开始输入文件流，检查文件  
                    String enCode = getFileEncode(files[i].getAbsolutePath());  
                    if("void".equals(enCode)){  
                        enCode="UTF-8";  
                    }if("windows-1252".equals(enCode)){  
                        enCode="GBK";  
                    }
                    String line = null;
                    RandomAccessFile raf = new RandomAccessFile(files[i].getAbsolutePath(), "rw");
                    StringBuffer stringBuffer=new StringBuffer();
                    while ((line = raf.readLine()) != null) {
                    	//中文乱码转换
                    	line = new String(line.getBytes("ISO-8859-1"), "utf-8");
                    	stringBuffer.append(line+"\r\n");
                    }
                    //将源文件内容清空,用于避免修改源文件时会覆盖的问题
                    raf.setLength(0);
                    
                    //创建新文件,并向文件写入源文件内容
                    String newFileName = "D:\\tomcat\\newfile.jsp";
                    File newfile=new File(newFileName);//待写入文件
                    FileOutputStream in = new FileOutputStream(newfile);
                    OutputStreamWriter out = new OutputStreamWriter(in,"utf-8");
                    BufferedWriter bw=new BufferedWriter(out);
                    //将读取到的文件内容临时存放到新文件,避免修改源文件时会覆盖的问题
                    bw.write(stringBuffer.toString());
                    bw.close();
                    out.close();
                    in.close();
                    //读取新建的源文件
                    RandomAccessFile raf1 = new RandomAccessFile(newFileName, "rw");
                    
                    long lastPoint = 0;//记住上一次的偏移量
                    while ((line = raf1.readLine()) != null) {
                        line = new String(line.getBytes("ISO-8859-1"), "utf-8");
                        char[] charArray = line.toCharArray(); 
                        boolean exist = true;
                    	for (int k = 0; k < charArray.length; k ++) {    
                            if ((charArray[k] >= 0x4e00) && (charArray[k] <= 0x9fbb)) {
                            	//存在中文才会进入
                                //每行进行扫描，用正则判断含有中文的行
                            	//中文[\u4e00-\u9fa5]
                            	//匹配双字节字符(包括汉字在内)：[^\x00-\xff]
                                Pattern p = Pattern.compile("([\u4e00-\u9fa5]+)"); 
                                String mv = "";  
                                //正则判断
                                Matcher m = p.matcher( line ); 
                                //遍历含有中文的行。并取出中文
                                while (m.find()) {    
                                    mv += m.group(0);    
                                }
                                for(String key:set){//遍历
                            		String value = pro.getProperty(key);
                            		if(value.trim().equals(mv)) {
                                		String newStr = "<spring:message code=\""+key.trim()+"\"/>";
                                		line = line.replace(mv, newStr);
                                        raf.seek(lastPoint);
                    	                raf.write((line+"\r\n").getBytes());
                    	                exist = false;
                    	                break;
                                	}
                            	}
                                if(exist) {
                                	raf.seek(lastPoint);
                                	//writeBytes()会出现中文乱码,write可以避免
                	                raf.write((line+"\r\n").getBytes());
                	                exist = false;
                                }
                                flag ++ ;  
                                //wrong++;  
                                if(flag!=0) k =charArray.length ;  
                            }
                        }
                        if(exist) {
                        	raf.seek(lastPoint);
                        	raf.write((line+"\r\n").getBytes());
                        }
                        lastPoint = raf.getFilePointer();
                    }
                    //flag!=0 说明该文件中含有中文  
                    if(flag!=0) wrong++ ;
                    raf.close();
                    raf1.close();
                    //删除新建的临时文件
                    newfile.delete();
                }  
            }  
        } 
	}
    
}