import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import org.mozilla.intl.chardet.HtmlCharsetDetector;
import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
import org.mozilla.intl.chardet.nsPSMDetector;
/**
* @author springmvc2006@sina.com
*
*/
public class ChangeFile2UTF8 {
public static void main(String[] args) throws Exception {
String newEncoding = "utf-8"; // 文件新的编码
File oldFileDir = new File("D:/task/input");
String newFileDir = "D:/task/output";
ChangeTask(oldFileDir, newFileDir, newEncoding);
System.out.println("转码为:"+newEncoding+"...成功");
}
/**
* 递归函数
* @param oldFileDir
* @param newFileDir
* @param newEncoding
* @throws Exception
*/
public static void ChangeTask(File oldFileDir, String newFileDir, String newEncoding)throws Exception{
if (oldFileDir.isDirectory()) {
File[] oldFile = oldFileDir.listFiles();
for (int i = 0; i < oldFile.length; i++) {
if(oldFile[i].isFile()){
if(oldFile[i].getName().indexOf(".jar") != -1){
System.out.println(oldFile[i].getName());
continue;
} //jar 文件不要转
String encodingOld = new FileCharsetDetector().guestFileEncoding(oldFile[i], 2);
System.out.println(oldFile[i].getAbsolutePath() +"....."+encodingOld);
//encodingOld = "unicode";
if("windows".toLowerCase().indexOf(encodingOld) != -1){
encodingOld = "unicode";
}else if("Big".toLowerCase().indexOf(encodingOld) != -1){
encodingOld = "gbk";
}else if("nomatch".toLowerCase().indexOf(encodingOld) != -1){
encodingOld = "gbk";
}
saveFile2OtherEncoding(new File(oldFileDir, oldFile[i]
.getName()), newFileDir, encodingOld, newEncoding);
}else{
ChangeTask(oldFile[i], newFileDir+"/"+oldFile[i].getName(), newEncoding);
}
}
}
}
/**
* 转码函数
* @param oldFile
* @param newFilePathString
*/
public static void saveFile2OtherEncoding(File oldFile, String newFileDir,
String oldEncoding, String newEncoding)throws Exception {
FileInputStream fileInputStream = null;
InputStreamReader inputStreamRead = null;
BufferedReader bufferRead = null;
BufferedWriter newFileBW = null;
OutputStreamWriter outputStreamWriter = null;
FileOutputStream fileOutputStream = null;
try {
fileInputStream = new FileInputStream(oldFile);
inputStreamRead = new InputStreamReader(fileInputStream, oldEncoding);
bufferRead = new BufferedReader(inputStreamRead);
createFileName(newFileDir);
File copyFile = new File(newFileDir, oldFile.getName());
fileOutputStream = new FileOutputStream(copyFile, false);
outputStreamWriter = new OutputStreamWriter(fileOutputStream,
newEncoding);
newFileBW = new BufferedWriter(outputStreamWriter);
String strTSVLine = "";
while ((strTSVLine = bufferRead.readLine()) != null) {
if (strTSVLine.equals("")) {
continue;
}
newFileBW.write(strTSVLine+ "\r\n");
//newFileBW.write(strTSVLine.replaceAll("=gbk", "=utf-8")+ "\r\n");
//System.out.println(strTSVLine);
}
} finally {
if (bufferRead != null)
bufferRead.close();
if (newFileBW != null) {
newFileBW.flush();
newFileBW.close();
}
}
}
/**
* 创建文件夹
* @param newFileDir
*/
public static void createFileName(String newFileDir) {
File newFile = new File(newFileDir);
if (!newFile.exists()) {
newFile.mkdirs();
}
}
}
class FileCharsetDetector {
private boolean found = false;
/**
* 如果完全匹配某个字符集检测算法, 则该属性保存该字符集的名称. 否则(如二进制文件)其值就为默认值 null, 这时应当查询属性
*/
private String encoding = null;
public static void mains(String[] argv) throws Exception {
String encoding = new FileCharsetDetector().guestFileEncoding("D:/task/input/GetAllStlrsDocAction.java");
String encodingTwo = new FileCharsetDetector().guestFileEncoding(new File("D:/task/input/GetAllStlrsDocAction.java"), 2);
System.out.println(encoding);
System.out.println(encodingTwo);
}
/**
* 传入一个文件(File)对象,检查文件编码
*
* @param file
* File对象实例
* @return 文件编码,若无,则返回null
* @throws FileNotFoundException
* @throws IOException
*/
public String guestFileEncoding(File file) throws FileNotFoundException,
IOException {
return geestFileEncoding(file, new nsDetector());
}
/**
* 获取文件的编码
*
* @param file
* File对象实例
* @param languageHint
* 语言提示区域代码 eg:1 : Japanese; 2 : Chinese; 3 : Simplified Chinese;
* 4 : Traditional Chinese; 5 : Korean; 6 : Dont know (default)
* @return 文件编码,eg:UTF-8,GBK,GB2312形式,若无,则返回null
* @throws FileNotFoundException
* @throws IOException
*/
public String guestFileEncoding(File file, int languageHint)
throws FileNotFoundException, IOException {
return geestFileEncoding(file, new nsDetector(languageHint));
}
/**
* 获取文件的编码
*
* @param path
* 文件路径
* @return 文件编码,eg:UTF-8,GBK,GB2312形式,若无,则返回null
* @throws FileNotFoundException
* @throws IOException
*/
public String guestFileEncoding(String path) throws FileNotFoundException,
IOException {
return guestFileEncoding(new File(path));
}
/**
* 获取文件的编码
*
* @param path
* 文件路径
* @param languageHint
* 语言提示区域代码 eg:1 : Japanese; 2 : Chinese; 3 : Simplified Chinese;
* 4 : Traditional Chinese; 5 : Korean; 6 : Dont know (default)
* @return
* @throws FileNotFoundException
* @throws IOException
*/
public String guestFileEncoding(String path, int languageHint)
throws FileNotFoundException, IOException {
return guestFileEncoding(new File(path), languageHint);
}
/**
* 获取文件的编码
*
* @param file
* @param det
* @return
* @throws FileNotFoundException
* @throws IOException
*/
private String geestFileEncoding(File file, nsDetector det)
throws FileNotFoundException, IOException {
// Set an observer...
// The Notify() will be called when a matching charset is found.
det.Init(new nsICharsetDetectionObserver() {
public void Notify(String charset) {
found = true;
encoding = charset;
}
});
BufferedInputStream imp = new BufferedInputStream(new FileInputStream(
file));
byte[] buf = new byte[1024];
int len;
boolean done = false;
boolean isAscii = true;
while ((len = imp.read(buf, 0, buf.length)) != -1) {
// Check if the stream is only ascii.
if (isAscii)
isAscii = det.isAscii(buf, len);
// DoIt if non-ascii and not done yet.
if (!isAscii && !done)
done = det.DoIt(buf, len, false);
}
det.DataEnd();
if (isAscii) {
encoding = "ASCII";
found = true;
}
if (!found) {
String prob[] = det.getProbableCharsets();
if (prob.length > 0) {
// 在没有发现情况下,则取第一个可能的编码
encoding = prob[0];
} else {
return null;
}
}
return encoding;
}
}