GBK(GB2312)与UTF-8文件转码

最新推荐文章于 2024-08-05 15:29:58 发布

burro630

最新推荐文章于 2024-08-05 15:29:58 发布

阅读量2.2k

点赞数 2

分类专栏：安卓学习笔记文章标签： UTF8 GBK 字符集编码转换 idea

本文链接：https://blog.csdn.net/arson663300/article/details/51842864

版权

安卓学习笔记专栏收录该内容

6 篇文章 0 订阅

订阅专栏

最近使用的Intelij IDEA开发工具，转码有点小问题。百度了一下，Eclipse可以自动转码，而IDEA却不可以。总是需要手动去转若要把源文件由GBK转成UTF-8的，得靠其他方式了。网上搜罗了一下方法，然后自己整理了一下。现把代码贴出来，测试OK、可以直接使用！

需要的四个jar包，分别是：“antlr-2.7.6.jar”、“commons-io-2.4.jar”、“cpdetector_1.0.5 .jar”、“jchardet-1.0.jar” 网上可以直接下载，根据下面的方法。可以把任意的编码换成你想要的。转成新编码以后的文件把源文件覆盖掉就可以了

package com.company;

import cpdetector.io.ASCIIDetector;
import cpdetector.io.CodepageDetectorProxy;
import cpdetector.io.JChardetFacade;
import cpdetector.io.UnicodeDetector;
import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Collection;

public class Main {
    privatestatic int fileCount = 0;
    privatestatic int convertedCount = 0;

    publicstatic void main(String[] args) {
        //源路径
        String primaryPath = "E:\\ WorkSpace\\Test\\src\\com";
        //目标路径
        String targetPath = "E:/src";
        //获取所有java文件
        utf8AndGbkConvert(primaryPath,targetPath, false);

    }

    /**
     * UTF8和GBK(GB2312)互相转换
     *
     * @param primaryPath 源文件路径
     * @param targetPath 目标文件路径
     * @param isUtf8ToGbk 是否是UTF-9转成GBK true表示UTF-8转成GBK（GB2312） false表示GBK(GB2312)转成UTF-8
     */
    public static void utf8AndGbkConvert(StringprimaryPath, String targetPath, boolean isUtf8ToGbk) {
        Collection<File> javaPrimaryFiles = FileUtils.listFiles(new File(primaryPath), new String[]{"java"}, true);
        fileCount= javaPrimaryFiles.size();
        for (File javaPrimaryFile :javaPrimaryFiles) {
            try {
                String chatsetName = judgeChatsetByFile(javaPrimaryFile);
                String newFilePath =targetPath + javaPrimaryFile.getAbsolutePath().substring(primaryPath.length());
                newFilePath =newFilePath.replaceAll("\\\\", "/");

                if (isUtf8ToGbk) {
                    if (chatsetName.equalsIgnoreCase("UTF-8")) {
                        //如果是源文件为UTF-8则转换成GBK，
                        FileUtils.writeLines(new File(newFilePath), "GBK", FileUtils.readLines(javaPrimaryFile,"UTF-8"));
                    } else {
                        //不是则直接复制
                        File f = new File(newFilePath.substring(0, newFilePath.lastIndexOf("/")));
                        if (!f.exists()) f.mkdirs();
                        File newFile = new File(newFilePath);
                        if (!newFile.exists())newFile.createNewFile();
                        FileUtils.copyFile(javaPrimaryFile,newFile);
                    }
                } else {
                    if (chatsetName.equalsIgnoreCase("GBK") || chatsetName.equalsIgnoreCase("GB2312")) {
                        //如果是源文件为GBK或者GB2312则转换成UTF-8，

                        FileUtils.writeLines(new File(newFilePath), "UTF-8", FileUtils.readLines(javaPrimaryFile,chatsetName));
                    } else {
                        File f = new File(newFilePath.substring(0, newFilePath.lastIndexOf("/")));
                        if (!f.exists()) f.mkdirs();
                        File newFile = new File(newFilePath);
                        if (!newFile.exists())newFile.createNewFile();
                        FileUtils.copyFile(javaPrimaryFile,newFile);
                    }
                }

            } catch(IOException e) {
               e.printStackTrace();
            }
        }
        System.out.println("fileCount:" + fileCount);
        System.out.println("convertedCount:" + convertedCount);

    }

    publicstatic String judgeChatsetByFile(File f) {

        CodepageDetectorProxy detector =CodepageDetectorProxy.getInstance();

        //detector.add(new ParsingDetector(false)); //如果不希望判断xml的encoding，而是要判断该xml文件的编码，则可以注释掉

        detector.add(JChardetFacade.getInstance());

        //ASCIIDetector用于ASCII编码测定

        detector.add(ASCIIDetector.getInstance());

        //UnicodeDetector用于Unicode家族编码的测定

        detector.add(UnicodeDetector.getInstance());

        Charset charset = null;
        try {
            charset =detector.detectCodepage(f.toURL());
        } catch(Exception ex) {
            ex.printStackTrace();
        }
        String codeName = "";
        if (charset != null) {
            System.out.println(f.getName() + "编码是：" + charset.name());
            convertedCount++;
            codeName = charset.name();
        } else {
            System.out.println(f.getName() + "未知");
            codeName = "UNKOWN";
        }
        returncodeName;
    }
}