java jchardet_借助JCharDet获取文件字符集

packagecom.zhyea.util;importjava.io.BufferedInputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileNotFoundException;importjava.io.IOException;importorg.mozilla.intl.chardet.nsDetector;importorg.mozilla.intl.chardet.nsICharsetDetectionObserver;/*** 借助JCharDet获取文件字符集

*

*@authorrobin

**/

public classFileCharsetDetector {/*** 字符集名称*/

private staticString encoding;/*** 字符集是否已检测到*/

private static booleanfound;private staticnsDetector detector;private staticnsICharsetDetectionObserver observer;/*** 适应语言枚举

*@authorrobin

**/

enumLanguage{

Japanese(1),

Chinese(2),

SimplifiedChinese(3),

TraditionalChinese(4),

Korean(5),

DontKnow(6);private inthint;

Language(inthint){this.hint =hint;

}public intgetHint(){return this.hint;

}

}/*** 传入一个文件(File)对象,检查文件编码

*

*@paramfile

* File对象实例

*@return文件编码,若无,则返回null

*@throwsFileNotFoundException

*@throwsIOException*/

public static String checkEncoding(File file) throwsFileNotFoundException,

IOException {returncheckEncoding(file, getNsdetector());

}/*** 获取文件的编码

*

*@paramfile

* File对象实例

*@paramlanguage

* 语言

*@return文件编码

*@throwsFileNotFoundException

*@throwsIOException*/

public staticString checkEncoding(File file, Language lang)throwsFileNotFoundException, IOException {return checkEncoding(file, newnsDetector(lang.getHint()));

}/*** 获取文件的编码

*

*@parampath

* 文件路径

*@return文件编码,eg:UTF-8,GBK,GB2312形式,若无,则返回null

*@throwsFileNotFoundException

*@throwsIOException*/

public static String checkEncoding(String path) throwsFileNotFoundException,

IOException {return checkEncoding(newFile(path));

}/*** 获取文件的编码

*

*@parampath

* 文件路径

*@paramlanguage

* 语言

*@return*@throwsFileNotFoundException

*@throwsIOException*/

public staticString checkEncoding(String path, Language lang)throwsFileNotFoundException, IOException {return checkEncoding(newFile(path), lang);

}/*** 获取文件的编码

*

*@paramfile

*@paramdet

*@return*@throwsFileNotFoundException

*@throwsIOException*/

private staticString checkEncoding(File file, nsDetector detector)throwsFileNotFoundException, IOException {

detector.Init(getCharsetDetectionObserver());if(isAscii(file, detector)) {

encoding= "ASCII";

found= true;

}if (!found) {

String prob[]=detector.getProbableCharsets();if (prob.length > 0) {

encoding= prob[0];

}else{return null;

}

}returnencoding;

}/*** 检查文件编码类型是否是ASCII型

*@paramfile

* 要检查编码的文件

*@paramdetector

*@return*@throwsIOException*/

private static boolean isAscii(File file, nsDetector detector) throwsIOException{

BufferedInputStream input= null;try{

input= new BufferedInputStream(newFileInputStream(file));byte[] buffer = new byte[1024];inthasRead;boolean done = false;boolean isAscii = true;while ((hasRead=input.read(buffer)) != -1) {if(isAscii)

isAscii=detector.isAscii(buffer, hasRead);if (!isAscii && !done)

done= detector.DoIt(buffer, hasRead, false);

}returnisAscii;

}finally{

detector.DataEnd();if(null!=input)input.close();

}

}/*** nsDetector单例创建

*@return

*/

private staticnsDetector getNsdetector(){if(null ==detector){

detector= newnsDetector();

}returndetector;

}/*** nsICharsetDetectionObserver 单例创建

*@return

*/

private staticnsICharsetDetectionObserver getCharsetDetectionObserver(){if(null==observer){

observer= newnsICharsetDetectionObserver() {public voidNotify(String charset) {

found= true;

encoding=charset;

}

};

}returnobserver;

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值