importjava.io.BufferedInputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileNotFoundException;importjava.io.IOException;importorg.mozilla.intl.chardet.nsDetector;importorg.mozilla.intl.chardet.nsICharsetDetectionObserver;
public classFileCharsetDetector {private boolean found = false;private String encoding = null;public static void main(String[] argv) throwsException {
File file1= new File("C:\\test1.txt");
System.out.println("文件编码:" + newFileCharsetDetector().guessFileEncoding(file1));
}/*** 传入一个文件(File)对象,检查文件编码
*
*@paramfile
* File对象实例
*@return文件编码,若无,则返回null
*@throwsFileNotFoundException
*@throwsIOException*/
public String guessFileEncoding(File file) throwsFileNotFoundException, IOException {return guessFileEncoding(file, newnsDetector());
}/***
* 获取文件的编码
*@paramfile
* File对象实例
*@paramlanguageHint
* 语言提示区域代码@see#nsPSMDetector ,取值如下:
* 1 : Japanese
* 2 : Chinese
* 3 : Simplified Chinese
* 4 : Traditional Chinese
* 5 : Korean
* 6 : Dont know(default)
*
*
*@return文件编码,eg:UTF-8,GBK,GB2312形式(不确定的时候,返回可能的字符编码序列);若无,则返回null
*@throwsFileNotFoundException
*@throwsIOException*/
public String guessFileEncoding(File file, int languageHint) throwsFileNotFoundException, IOException {return guessFileEncoding(file, newnsDetector(languageHint));
}/*** 获取文件的编码
*
*@paramfile
*@paramdet
*@return*@throwsFileNotFoundException
*@throwsIOException*/
private String guessFileEncoding(File file, nsDetector det) throwsFileNotFoundException, IOException {//Set an observer...//The Notify() will be called when a matching charset is found.
det.Init(newnsICharsetDetectionObserver() {public voidNotify(String charset) {
encoding=charset;
found= true;
}
});
BufferedInputStream imp= new BufferedInputStream(newFileInputStream(file));byte[] buf = new byte[1024];intlen;boolean done = false;boolean isAscii = false;while ((len = imp.read(buf, 0, buf.length)) != -1) {//Check if the stream is only ascii.
isAscii =det.isAscii(buf, len);if(isAscii) {break;
}//DoIt if non-ascii and not done yet.
done = det.DoIt(buf, len, false);if(done) {break;
}
}
imp.close();
det.DataEnd();if(isAscii) {
encoding= "ASCII";
found= true;
}if (!found) {
String[] prob=det.getProbableCharsets();//这里将可能的字符集组合起来返回
for (int i = 0; i < prob.length; i++) {if (i == 0) {
encoding=prob[i];
}else{
encoding+= "," +prob[i];
}
}if (prob.length > 0) {//在没有发现情况下,也可以只取第一个可能的编码,这里返回的是一个可能的序列
returnencoding;
}else{return null;
}
}returnencoding;
}
}