批量转换 gbk 文件到utf8 文件,不引用三方类库【 java 代码】

由于需要批量转换源代码从gbk编码到utf8,所以写了这个工具类。

mac下面jdk11 环境执行通过

String path = ""; 替换成 文件或者目录,运行程序即可完成批量转换。


import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.concurrent.*;
import java.util.function.Consumer;

/**
 * gbk 转 utf8
 * jdk 11 下测试通过
 */
public class TransferEncodeToUtf8 {

    private static System.Logger log = System.getLogger(TransferEncodeToUtf8.class.getName());
    public static void main(String[] args){
        // 需要修改的文件或目录,会遍历目录,自动将gbk 转为 utf8
        String path = "......./程序源代码";
        scanDir(new File(path),".java",".", file -> {
            try {
                transferToUtf8(file);

            } catch (IOException e) {
                log.log(System.Logger.Level.ERROR,"error",e);
            }
        });

        threadPoolExecutor.shutdown();
    }
    // 参数可以自己调整
    static int corePoolSize = 30;
    static int maximumPoolSize = 100;
    static long keepAliveTime = 30;
    static TimeUnit unit = TimeUnit.SECONDS;
    static BlockingQueue<Runnable> workQueue = new ArrayBlockingQueue<>(800);
    public static ThreadPoolExecutor threadPoolExecutor =
            new ThreadPoolExecutor(corePoolSize, maximumPoolSize,
                    keepAliveTime, unit, workQueue, new ThreadPoolExecutor.CallerRunsPolicy());

    public static void fileJob(File file, String postFix, String grepPreFix, Consumer<File> function){
        if(file.getName().startsWith(grepPreFix)){
            // ignore it.
            return;
        }
        if(file.getName().endsWith(postFix)){
            threadPoolExecutor.execute(() -> function.accept(file));
        }
    }

    /**
     *
     * @param file 文件、文件夹
     * @param postFix 处理的指定后缀
     * @param grepPreFix 忽略的前缀
     * @param function
     */
    public static void scanDir(File file, String postFix,String grepPreFix, Consumer<File> function){
        if(file.isFile()){
            fileJob(file, postFix,grepPreFix, function);
        }else {
            //dir
            File[] list = file.listFiles();
            for (File f : list){
                if(file.isFile()){
                    fileJob(file, postFix,grepPreFix,function);
                }
                else {
                    scanDir(f,postFix,grepPreFix, function);
                }
            }
        }
    }

    public static void transferToUtf8(File file) throws IOException {

       Charset charset = FileEncodeDetect.charset(file);
       if(charset == null){
           log.log(System.Logger.Level.ERROR,"charset is null, file:"+file);
           return;
       }

       if(charset != Charset.forName("UTF-8")){
           try{
               String content = Files.readString(Paths.get(file.toURI()),charset);
               Files.writeString(Paths.get(file.toURI()),content, Charset.forName("utf8") );
               log.log(System.Logger.Level.INFO,"[gbk to utf8] "+ file.toURI()+",transfer encode from gbk to utf8");

           }catch (Exception e){
               log.log(System.Logger.Level.ERROR,"[gbk to utf8 error] charset:"+ charset + ","+ file.toURI() +",error",e);
           }
             return;
       }

        log.log(System.Logger.Level.INFO,"[utf8 file] "+ file.toURI()+" is utf8, do nothing");


    }

}

文件编码探测代码:


import java.io.*;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;

/**
 * 文件编码探测
 */
public class FileEncodeDetect {

    public static Charset charset(File file,String defaultCharset) throws IOException {
        Charset charset = charset(file);
        if(charset != null){
            return charset;
        }
        return Charset.forName(defaultCharset);
    }
    public static Charset charset(File file) throws IOException {
        Charset encode = encodeByBom(file);
        if(encode != null){
            return encode;
        }
        // 有问题可以只配置 file,"UTF-8","GBK" ,后面少配置下编码
        return charsetDetect(file,"UTF-8","GBK");
//                ,"GB18030","GB2312","windows-1253", "ISO-8859-7"
    }

    private static Charset charsetDetect(File file,String... charsets){
        if(charsets.length <= 0){
            throw new IllegalArgumentException("charsets must set");
        }
        for(String charset: charsets){
            Charset charsetDetect = charsetDetectItem(file, Charset.forName(charset));
            if(charsetDetect != null){
                return charsetDetect;
            }
        }

        return null;
    }
    private static Charset charsetDetectItem(File file,Charset charsets){
        try {
            BufferedInputStream input = new BufferedInputStream(new FileInputStream(file));
            CharsetDecoder decoder = charsets.newDecoder();
            decoder.reset();

            byte[] buffer = new byte[8* 2048];
            Boolean identified = false;
            int length;
            while (((length = input.read(buffer,0, buffer.length)) != -1) && (!identified)){
                identified = identified(buffer,length, decoder);
            }

            closeQuietly(input);

            if (identified != null) {
                if (identified) return charsets;
            }
            return null;

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    private static boolean identified(byte[] buffer, int length, CharsetDecoder decoder) {

        try {
            decoder.decode(ByteBuffer.wrap(buffer,0, length));
        } catch (CharacterCodingException e) {
            return false;
        }
        return true;
    }
    private static Charset encodeByBom(File file) throws IOException {
        BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file));
        int p = (bufferedInputStream.read() << 8) + bufferedInputStream.read();
        Charset code = null;
        switch (p){
            case 0xefbb:
                code = Charset.forName("UTF-8");
                break;
            case 0xfffe:
                code = Charset.forName("Unicode");
                break;
            case 0xfeff:
                code = Charset.forName("UTF-16BE");
                break;
            default: {
                // nothing
            }
        }
        closeQuietly(bufferedInputStream);

        return code;
    }

    private static void closeQuietly(Closeable closeable){
        try{
            closeable.close();
        }catch (Exception e){
            e.printStackTrace();
        }
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
批量将xlxs文件gbk转换utf8,可以使用Python的第三方库openpyxl。首先需要安装openpyxl库。可以使用pip命令安装:pip install openpyxl。安装完成后可以开始编写Python脚本。 首先需要通过os模块获取指定目录下所有的xlsx文件名,代码如下: ```python import os path = "your_folder_path" # 设置目录路径 files = os.listdir(path) # 获取目录下所有文件名 xlsx_files = [file for file in files if file.endswith(".xlsx")] # 找出xlsx文件 ``` 上述代码将找出所在目录下以.xlsx为结尾的所有文件名,并放入列表xlsx_files中。 接下来需要遍历所有xlsx文件,逐一进行转换。在循环中,使用openpyxl库中的load_workbook方法打开xlsx文件,将工作表的编码格式从gbk转换utf8,并保存成新文件代码如下: ```python import openpyxl for file in xlsx_files: workbook = openpyxl.load_workbook(os.path.join(path, file)) # 打开文件 for sheet in workbook.sheetnames: worksheet = workbook[sheet] for row in range(1, worksheet.max_row + 1): for column in range(1, worksheet.max_column + 1): cell = worksheet.cell(row, column) if cell.value is not None and isinstance(cell.value, str): cell.value = cell.value.encode("gbk").decode("utf-8") # 编码转换 new_file = file[:-5] + "_utf8.xlsx" # 新文件名 workbook.save(os.path.join(path, new_file)) # 保存新文件 ``` 上述代码中,首先使用load_workbook方法打开xlsx文件,然后遍历所有工作表,再逐一遍历单元格,将文本类型的单元格内容从gbk编码转换utf8编码,并保存为新文件。 执行上述代码后,将目录下所有xlsx文件的编码格式都转换utf8格式,并保存为新文件,新文件名为原文件名加上"_utf8"后缀。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值