由于需要批量转换源代码从gbk编码到utf8,所以写了这个工具类。
mac下面jdk11 环境执行通过
String path = ""; 替换成 文件或者目录,运行程序即可完成批量转换。
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.concurrent.*;
import java.util.function.Consumer;
/**
* gbk 转 utf8
* jdk 11 下测试通过
*/
public class TransferEncodeToUtf8 {
private static System.Logger log = System.getLogger(TransferEncodeToUtf8.class.getName());
public static void main(String[] args){
// 需要修改的文件或目录,会遍历目录,自动将gbk 转为 utf8
String path = "......./程序源代码";
scanDir(new File(path),".java",".", file -> {
try {
transferToUtf8(file);
} catch (IOException e) {
log.log(System.Logger.Level.ERROR,"error",e);
}
});
threadPoolExecutor.shutdown();
}
// 参数可以自己调整
static int corePoolSize = 30;
static int maximumPoolSize = 100;
static long keepAliveTime = 30;
static TimeUnit unit = TimeUnit.SECONDS;
static BlockingQueue<Runnable> workQueue = new ArrayBlockingQueue<>(800);
public static ThreadPoolExecutor threadPoolExecutor =
new ThreadPoolExecutor(corePoolSize, maximumPoolSize,
keepAliveTime, unit, workQueue, new ThreadPoolExecutor.CallerRunsPolicy());
public static void fileJob(File file, String postFix, String grepPreFix, Consumer<File> function){
if(file.getName().startsWith(grepPreFix)){
// ignore it.
return;
}
if(file.getName().endsWith(postFix)){
threadPoolExecutor.execute(() -> function.accept(file));
}
}
/**
*
* @param file 文件、文件夹
* @param postFix 处理的指定后缀
* @param grepPreFix 忽略的前缀
* @param function
*/
public static void scanDir(File file, String postFix,String grepPreFix, Consumer<File> function){
if(file.isFile()){
fileJob(file, postFix,grepPreFix, function);
}else {
//dir
File[] list = file.listFiles();
for (File f : list){
if(file.isFile()){
fileJob(file, postFix,grepPreFix,function);
}
else {
scanDir(f,postFix,grepPreFix, function);
}
}
}
}
public static void transferToUtf8(File file) throws IOException {
Charset charset = FileEncodeDetect.charset(file);
if(charset == null){
log.log(System.Logger.Level.ERROR,"charset is null, file:"+file);
return;
}
if(charset != Charset.forName("UTF-8")){
try{
String content = Files.readString(Paths.get(file.toURI()),charset);
Files.writeString(Paths.get(file.toURI()),content, Charset.forName("utf8") );
log.log(System.Logger.Level.INFO,"[gbk to utf8] "+ file.toURI()+",transfer encode from gbk to utf8");
}catch (Exception e){
log.log(System.Logger.Level.ERROR,"[gbk to utf8 error] charset:"+ charset + ","+ file.toURI() +",error",e);
}
return;
}
log.log(System.Logger.Level.INFO,"[utf8 file] "+ file.toURI()+" is utf8, do nothing");
}
}
文件编码探测代码:
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
/**
* 文件编码探测
*/
public class FileEncodeDetect {
public static Charset charset(File file,String defaultCharset) throws IOException {
Charset charset = charset(file);
if(charset != null){
return charset;
}
return Charset.forName(defaultCharset);
}
public static Charset charset(File file) throws IOException {
Charset encode = encodeByBom(file);
if(encode != null){
return encode;
}
// 有问题可以只配置 file,"UTF-8","GBK" ,后面少配置下编码
return charsetDetect(file,"UTF-8","GBK");
// ,"GB18030","GB2312","windows-1253", "ISO-8859-7"
}
private static Charset charsetDetect(File file,String... charsets){
if(charsets.length <= 0){
throw new IllegalArgumentException("charsets must set");
}
for(String charset: charsets){
Charset charsetDetect = charsetDetectItem(file, Charset.forName(charset));
if(charsetDetect != null){
return charsetDetect;
}
}
return null;
}
private static Charset charsetDetectItem(File file,Charset charsets){
try {
BufferedInputStream input = new BufferedInputStream(new FileInputStream(file));
CharsetDecoder decoder = charsets.newDecoder();
decoder.reset();
byte[] buffer = new byte[8* 2048];
Boolean identified = false;
int length;
while (((length = input.read(buffer,0, buffer.length)) != -1) && (!identified)){
identified = identified(buffer,length, decoder);
}
closeQuietly(input);
if (identified != null) {
if (identified) return charsets;
}
return null;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
private static boolean identified(byte[] buffer, int length, CharsetDecoder decoder) {
try {
decoder.decode(ByteBuffer.wrap(buffer,0, length));
} catch (CharacterCodingException e) {
return false;
}
return true;
}
private static Charset encodeByBom(File file) throws IOException {
BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file));
int p = (bufferedInputStream.read() << 8) + bufferedInputStream.read();
Charset code = null;
switch (p){
case 0xefbb:
code = Charset.forName("UTF-8");
break;
case 0xfffe:
code = Charset.forName("Unicode");
break;
case 0xfeff:
code = Charset.forName("UTF-16BE");
break;
default: {
// nothing
}
}
closeQuietly(bufferedInputStream);
return code;
}
private static void closeQuietly(Closeable closeable){
try{
closeable.close();
}catch (Exception e){
e.printStackTrace();
}
}
}