
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;

import org.mozilla.intl.chardet.HtmlCharsetDetector;
import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
import org.mozilla.intl.chardet.nsPSMDetector;

 * @author  springmvc2006@sina.com
public class ChangeFile2UTF8 {

 public static void main(String[] args) throws Exception {
  String newEncoding = "utf-8"; // 文件新的编码
  File oldFileDir = new File("D:/task/input");
  String newFileDir = "D:/task/output";
  ChangeTask(oldFileDir, newFileDir, newEncoding);
  * 递归函数
  * @param oldFileDir
  * @param newFileDir
  * @param newEncoding
  * @throws Exception
 public static void ChangeTask(File oldFileDir, String newFileDir, String newEncoding)throws Exception{

  if (oldFileDir.isDirectory()) {
   File[] oldFile = oldFileDir.listFiles();
   for (int i = 0; i < oldFile.length; i++) {
     if(oldFile[i].getName().indexOf(".jar") != -1){
     } //jar 文件不要转
     String   encodingOld = new FileCharsetDetector().guestFileEncoding(oldFile[i], 2);
     System.out.println(oldFile[i].getAbsolutePath() +"....."+encodingOld);
     //encodingOld = "unicode";
     if("windows".toLowerCase().indexOf(encodingOld) != -1){
      encodingOld = "unicode";
     }else if("Big".toLowerCase().indexOf(encodingOld) != -1){
      encodingOld = "gbk";
     }else if("nomatch".toLowerCase().indexOf(encodingOld) != -1){
      encodingOld = "gbk";

     saveFile2OtherEncoding(new File(oldFileDir, oldFile[i]
                                       .getName()), newFileDir, encodingOld, newEncoding);
     ChangeTask(oldFile[i], newFileDir+"/"+oldFile[i].getName(), newEncoding);



  * 转码函数
  * @param oldFile
  * @param newFilePathString
 public static void saveFile2OtherEncoding(File oldFile, String newFileDir,
   String oldEncoding, String newEncoding)throws Exception {
  FileInputStream fileInputStream = null;
  InputStreamReader inputStreamRead = null;
  BufferedReader bufferRead = null;

  BufferedWriter newFileBW = null;
  OutputStreamWriter outputStreamWriter = null;
  FileOutputStream fileOutputStream = null;

  try {
   fileInputStream = new FileInputStream(oldFile);
   inputStreamRead = new InputStreamReader(fileInputStream, oldEncoding);
   bufferRead = new BufferedReader(inputStreamRead);
   File copyFile = new File(newFileDir, oldFile.getName());
   fileOutputStream = new FileOutputStream(copyFile, false);
   outputStreamWriter = new OutputStreamWriter(fileOutputStream,
   newFileBW = new BufferedWriter(outputStreamWriter);

   String strTSVLine = "";
   while ((strTSVLine = bufferRead.readLine()) != null) {
    if (strTSVLine.equals("")) {
    newFileBW.write(strTSVLine+ "\r\n");
    //newFileBW.write(strTSVLine.replaceAll("=gbk", "=utf-8")+ "\r\n");

  } finally {
   if (bufferRead != null)
   if (newFileBW != null) {


  * 创建文件夹
  * @param newFileDir
 public static void createFileName(String newFileDir) {
  File newFile = new File(newFileDir);
  if (!newFile.exists()) {

class FileCharsetDetector {
    private boolean found = false;  
      * 如果完全匹配某个字符集检测算法, 则该属性保存该字符集的名称. 否则(如二进制文件)其值就为默认值 null, 这时应当查询属性  
     private String encoding = null;  
     public static void mains(String[] argv) throws Exception {  
            String   encoding = new FileCharsetDetector().guestFileEncoding("D:/task/input/GetAllStlrsDocAction.java");  
            String   encodingTwo = new FileCharsetDetector().guestFileEncoding(new File("D:/task/input/GetAllStlrsDocAction.java"), 2);  
      * 传入一个文件(File)对象,检查文件编码 
      * @param file 
      *            File对象实例 
      * @return 文件编码,若无,则返回null 
      * @throws FileNotFoundException 
      * @throws IOException 
     public String guestFileEncoding(File file) throws FileNotFoundException,  
             IOException {  
         return geestFileEncoding(file, new nsDetector());  
      * 获取文件的编码 
      * @param file 
      *            File对象实例 
      * @param languageHint 
      *            语言提示区域代码 eg:1 : Japanese; 2 : Chinese; 3 : Simplified Chinese; 
      *            4 : Traditional Chinese; 5 : Korean; 6 : Dont know (default) 
      * @return 文件编码,eg:UTF-8,GBK,GB2312形式,若无,则返回null 
      * @throws FileNotFoundException 
      * @throws IOException 
     public String guestFileEncoding(File file, int languageHint)  
             throws FileNotFoundException, IOException {  
         return geestFileEncoding(file, new nsDetector(languageHint));  
      * 获取文件的编码 
      * @param path 
      *            文件路径 
      * @return 文件编码,eg:UTF-8,GBK,GB2312形式,若无,则返回null 
      * @throws FileNotFoundException 
      * @throws IOException 
     public String guestFileEncoding(String path) throws FileNotFoundException,  
             IOException {  
         return guestFileEncoding(new File(path));  
      * 获取文件的编码 
      * @param path 
      *            文件路径 
      * @param languageHint 
      *            语言提示区域代码 eg:1 : Japanese; 2 : Chinese; 3 : Simplified Chinese; 
      *            4 : Traditional Chinese; 5 : Korean; 6 : Dont know (default) 
      * @return 
      * @throws FileNotFoundException 
      * @throws IOException 
     public String guestFileEncoding(String path, int languageHint)  
             throws FileNotFoundException, IOException {  
         return guestFileEncoding(new File(path), languageHint);  
      * 获取文件的编码 
      * @param file 
      * @param det 
      * @return 
      * @throws FileNotFoundException 
      * @throws IOException 
     private String geestFileEncoding(File file, nsDetector det)  
             throws FileNotFoundException, IOException {  

         // Set an observer...  
         // The Notify() will be called when a matching charset is found.  
         det.Init(new nsICharsetDetectionObserver() {  
             public void Notify(String charset) {
                 found = true;  
                 encoding = charset;
         BufferedInputStream imp = new BufferedInputStream(new FileInputStream(  
         byte[] buf = new byte[1024];  
         int len;  
         boolean done = false;  
         boolean isAscii = true;  
         while ((len = imp.read(buf, 0, buf.length)) != -1) {  
             // Check if the stream is only ascii.  
             if (isAscii)  
                 isAscii = det.isAscii(buf, len);  
             // DoIt if non-ascii and not done yet.  
             if (!isAscii && !done)  
                 done = det.DoIt(buf, len, false);  
         if (isAscii) {  
             encoding = "ASCII";  
             found = true;  
         if (!found) {  
             String prob[] = det.getProbableCharsets();  
             if (prob.length > 0) {  
                 // 在没有发现情况下,则取第一个可能的编码  
                 encoding = prob[0];  
             } else {  
                 return null;  
         return encoding;  


你可以使用以下Python代码来批量修改文件编码: ```python import os from chardet import detect def convert_encoding(path): with open(path, 'rb') as fp: content = fp.read() encoding = detect(content)['encoding'] content = content.decode(encoding).encode('utf8') with open(path, 'wb') as fp: fp.write(content) def batch_convert_encoding(directory): for root, dirs, files in os.walk(directory): for file in files: if file.endswith(('.c', '.cpp', '.h', '.hpp')): path = os.path.join(root, file) convert_encoding(path) # 使用示例 batch_convert_encoding("目标文件夹目录") ``` 这段代码会递归地遍历指定目录下的所有文件,并将后缀名为.c, .cpp, .h, .hpp的文件转换为UTF-8编码。首先,它通过chardet库检测文件的原始编码。然后,将文件内容按照检测到的编码解码,并重新以UTF-8编码写入文件。这样就完成了批量修改文件编码的操作。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* [python 批量修改文件编码](https://blog.csdn.net/Eternal_Whispers/article/details/120220132)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v92^chatsearchT3_1"}}] [.reference_item style="max-width: 50%"] - *2* *3* [python实现批量转换文件编码encoding](https://blog.csdn.net/awisc/article/details/120901910)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v92^chatsearchT3_1"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]




当前余额3.43前往充值 >
领取后你会自动成为博主和红包主的粉丝 规则
钱包余额 0


