今天遇到一个要探测文件的字符集编码问题,查了一下资料,发现了一个 探测器: jchardet 需要 chardet.jar
例子如下:
**
* 获取文件的编码
*
* @param file
* @param det
* @return
* @throws FileNotFoundException
* @throws IOException
*
private String geestFileEncoding(File file, nsDetector det)
throws FileNotFoundException, IOException {
det.Init(new nsICharsetDetectionObserver() {
public void Notify(String charset) {
found = true;
encoding = charset;
}
});
BufferedInputStream imp = new BufferedInputStream(new FileInputStream(
file));
byte[] buf = new byte[1024];
int len;
boolean done = false;
boolean isAscii = true;
while ((len = imp.read(buf, 0, buf.length)) != -1) {
if (isAscii)
isAscii = det.isAscii(buf, len);
if (!isAscii && !done)
done = det.DoIt(buf, len, false);
}
det.DataEnd();
if (isAscii) {
encoding = "ASCII";
found = true;
}
if (!found) {
String prob[] = det.getProbableCharsets();
if (prob.length > 0) {
encoding = prob[0];
} else {
return null;
}
}
return encoding;
}