public static Reader readTranslateFile(File bsFile) {
final int BOM_SIZE = 4;
byte[] bom = new byte[BOM_SIZE];
BufferedReader reader = null;
PushbackInputStream pushbackInputStream = null;
try {
// 获取编码
String sourceEncoding = CpdetectorUtil.perceiveCode(bsFile);
// 读取BOM
pushbackInputStream = new PushbackInputStream(new FileInputStream(
bsFile), BOM_SIZE);
int n = pushbackInputStream.read(bom, 0, bom.length);
int unread = n;
// 判断是否有BOM
unread = ishasBOM(bom, n, unread);
// 读取偏移
pushbackInputStream.unread(bom, (n - unread), unread);
reader = new BufferedReader(new InputStreamReader(
pushbackInputStream, sourceEncoding));
return reader;
} catch (MalformedURLException e) {
Logger.getLogger().error(FileUtil.class, e);
} catch (IOException e) {
Logger.getLogger().error(FileUtil.class, e);
}
return null;
}
private static final CodepageDetectorProxy detector = CodepageDetectorProxy
.getInstance();
static {
detector.add(new ParsingDetector(false));
detector.add(JChardetFacade.getInstance());
detector.add(UnicodeDetector.getInstance());
detector.add(ASCIIDetector.getInstance());
}
private CpdetectorUtil() {
}
/**
*
* 感知文件编码方式。
*
* @param 待探测文件
* @return 编码字符串,为空表示没有找到该编码
* @throws IOException
* @throws MalformedURLException
*/
public static String perceiveCode(File file) throws IOException {
if (null == file || !file.exists()) {
return null;
}
Charset charset = null;
charset = detector.detectCodepage(file.toURI().toURL());
if (charset != null) {
return charset.name();
}
return null;
}
cpdetector_1.0.10.jar
chardet-1.0.jar
antlr-2.7.4.jar