批量修改文件的编码

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;

import org.mozilla.intl.chardet.HtmlCharsetDetector;
import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
import org.mozilla.intl.chardet.nsPSMDetector;

/**
 * @author  springmvc2006@sina.com
 *
 */
public class ChangeFile2UTF8 {

 public static void main(String[] args) throws Exception {
  String newEncoding = "utf-8"; // 文件新的编码
  File oldFileDir = new File("D:/task/input");
  String newFileDir = "D:/task/output";
  ChangeTask(oldFileDir, newFileDir, newEncoding);
  System.out.println("转码为:"+newEncoding+"...成功");
 }
 
 /**
  * 递归函数
  * @param oldFileDir
  * @param newFileDir
  * @param newEncoding
  * @throws Exception
  */
 public static void ChangeTask(File oldFileDir, String newFileDir, String newEncoding)throws Exception{

  if (oldFileDir.isDirectory()) {
   File[] oldFile = oldFileDir.listFiles();
   for (int i = 0; i < oldFile.length; i++) {
    if(oldFile[i].isFile()){
     if(oldFile[i].getName().indexOf(".jar") != -1){
      System.out.println(oldFile[i].getName());
      continue;
     } //jar 文件不要转
     String   encodingOld = new FileCharsetDetector().guestFileEncoding(oldFile[i], 2);
     System.out.println(oldFile[i].getAbsolutePath() +"....."+encodingOld);
     //encodingOld = "unicode";
     if("windows".toLowerCase().indexOf(encodingOld) != -1){
      encodingOld = "unicode";
     }else if("Big".toLowerCase().indexOf(encodingOld) != -1){
      encodingOld = "gbk";
     }else if("nomatch".toLowerCase().indexOf(encodingOld) != -1){
      encodingOld = "gbk";
     }

     saveFile2OtherEncoding(new File(oldFileDir, oldFile[i]
                                       .getName()), newFileDir, encodingOld, newEncoding);
    }else{
     ChangeTask(oldFile[i], newFileDir+"/"+oldFile[i].getName(), newEncoding);
    }

   }

  }
 }

 /**
  * 转码函数
  * @param oldFile
  * @param newFilePathString
  */
 public static void saveFile2OtherEncoding(File oldFile, String newFileDir,
   String oldEncoding, String newEncoding)throws Exception {
  FileInputStream fileInputStream = null;
  InputStreamReader inputStreamRead = null;
  BufferedReader bufferRead = null;

  BufferedWriter newFileBW = null;
  OutputStreamWriter outputStreamWriter = null;
  FileOutputStream fileOutputStream = null;


  try {
   fileInputStream = new FileInputStream(oldFile);
   inputStreamRead = new InputStreamReader(fileInputStream, oldEncoding);
   bufferRead = new BufferedReader(inputStreamRead);
   createFileName(newFileDir);
   File copyFile = new File(newFileDir, oldFile.getName());
   fileOutputStream = new FileOutputStream(copyFile, false);
   outputStreamWriter = new OutputStreamWriter(fileOutputStream,
     newEncoding);
   newFileBW = new BufferedWriter(outputStreamWriter);

   String strTSVLine = "";
   while ((strTSVLine = bufferRead.readLine()) != null) {
    if (strTSVLine.equals("")) {
     continue;
    }
    newFileBW.write(strTSVLine+ "\r\n");
    //newFileBW.write(strTSVLine.replaceAll("=gbk", "=utf-8")+ "\r\n");
    //System.out.println(strTSVLine);
   }

  } finally {
   if (bufferRead != null)
    bufferRead.close();
   if (newFileBW != null) {
    newFileBW.flush();
    newFileBW.close();
   }
  }
  

 }

 /**
  * 创建文件夹
  * @param newFileDir
  */
 public static void createFileName(String newFileDir) {
  File newFile = new File(newFileDir);
  if (!newFile.exists()) {
   newFile.mkdirs();
  }
 }
}

class FileCharsetDetector {
    private boolean found = false;  
   
     /** 
      * 如果完全匹配某个字符集检测算法, 则该属性保存该字符集的名称. 否则(如二进制文件)其值就为默认值 null, 这时应当查询属性  
      */ 
     private String encoding = null;  
  
     public static void mains(String[] argv) throws Exception {  
            String   encoding = new FileCharsetDetector().guestFileEncoding("D:/task/input/GetAllStlrsDocAction.java");  
            String   encodingTwo = new FileCharsetDetector().guestFileEncoding(new File("D:/task/input/GetAllStlrsDocAction.java"), 2);  
           
            System.out.println(encoding);
            System.out.println(encodingTwo);
     }  
  
     /** 
      * 传入一个文件(File)对象,检查文件编码 
      *  
      * @param file 
      *            File对象实例 
      * @return 文件编码,若无,则返回null 
      * @throws FileNotFoundException 
      * @throws IOException 
      */ 
     public String guestFileEncoding(File file) throws FileNotFoundException,  
             IOException {  
         return geestFileEncoding(file, new nsDetector());  
     }  
  
     /** 
      * 获取文件的编码 
      *  
      * @param file 
      *            File对象实例 
      * @param languageHint 
      *            语言提示区域代码 eg:1 : Japanese; 2 : Chinese; 3 : Simplified Chinese; 
      *            4 : Traditional Chinese; 5 : Korean; 6 : Dont know (default) 
      * @return 文件编码,eg:UTF-8,GBK,GB2312形式,若无,则返回null 
      * @throws FileNotFoundException 
      * @throws IOException 
      */ 
     public String guestFileEncoding(File file, int languageHint)  
             throws FileNotFoundException, IOException {  
         return geestFileEncoding(file, new nsDetector(languageHint));  
     }  
  
     /** 
      * 获取文件的编码 
      *  
      * @param path 
      *            文件路径 
      * @return 文件编码,eg:UTF-8,GBK,GB2312形式,若无,则返回null 
      * @throws FileNotFoundException 
      * @throws IOException 
      */ 
     public String guestFileEncoding(String path) throws FileNotFoundException,  
             IOException {  
         return guestFileEncoding(new File(path));  
     }  
  
     /** 
      * 获取文件的编码 
      *  
      * @param path 
      *            文件路径 
      * @param languageHint 
      *            语言提示区域代码 eg:1 : Japanese; 2 : Chinese; 3 : Simplified Chinese; 
      *            4 : Traditional Chinese; 5 : Korean; 6 : Dont know (default) 
      * @return 
      * @throws FileNotFoundException 
      * @throws IOException 
      */ 
     public String guestFileEncoding(String path, int languageHint)  
             throws FileNotFoundException, IOException {  
         return guestFileEncoding(new File(path), languageHint);  
     }  
  
     /** 
      * 获取文件的编码 
      *  
      * @param file 
      * @param det 
      * @return 
      * @throws FileNotFoundException 
      * @throws IOException 
      */ 
     private String geestFileEncoding(File file, nsDetector det)  
             throws FileNotFoundException, IOException {  

         // Set an observer...  
         // The Notify() will be called when a matching charset is found.  
         det.Init(new nsICharsetDetectionObserver() {  
             public void Notify(String charset) {
                 found = true;  
                 encoding = charset;
             }  
         });  
        
        
  
         BufferedInputStream imp = new BufferedInputStream(new FileInputStream(  
                 file));  
  
         byte[] buf = new byte[1024];  
         int len;  
         boolean done = false;  
         boolean isAscii = true;  
  
         while ((len = imp.read(buf, 0, buf.length)) != -1) {  
             // Check if the stream is only ascii.  
             if (isAscii)  
                 isAscii = det.isAscii(buf, len);  
  
             // DoIt if non-ascii and not done yet.  
             if (!isAscii && !done)  
                 done = det.DoIt(buf, len, false);  
         }  
         det.DataEnd();  
  
         if (isAscii) {  
             encoding = "ASCII";  
             found = true;  
         }  
  
         if (!found) {  
             String prob[] = det.getProbableCharsets();  
             if (prob.length > 0) {  
                 // 在没有发现情况下,则取第一个可能的编码  
                 encoding = prob[0];  
             } else {  
                 return null;  
             }  
            
         }  
         return encoding;  
     }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值