java自动识别文件编码
引入依赖
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>58.1</version>
</dependency>
代码如下
public class EncodingUtils {
private static final Logger LOGGER = LoggerFactory.getLogger(EncodingUtils.class);
public static String autoDiscernEncoding(File file){
String encoding = "GBK";
try {
Path path = Paths.get(file.getPath());
byte[] data = Files.readAllBytes(path);
CharsetDetector detector = new CharsetDetector();
detector.setText(data);
CharsetMatch match = detector.detect();
encoding = match.getName();
LOGGER.info("文件【{}】的编码为:【{}】", file.getName(), encoding);
return encoding;
} catch (IOException e) {
LOGGER.error("自动识别编码失败", e);
return encoding;
}
}
}