apache tika
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-text-module</artifactId>
<version>2.9.1</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>2.9.1</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers-standard-package</artifactId>
<version>2.9.1</version>
</dependency>
读取字符编码
import org.apache.tika.parser.txt.CharsetDetector;
import org.apache.tika.parser.txt.CharsetMatch;
CharsetDetector detector = new CharsetDetector();
detector.setText(FileUtils.readFileToByteArray(file));
//返回匹配到的第一个字符集
CharsetMatch charsetMatch = detector.detect();
//返回所有字符集
CharsetMatch[] matches = detector.detectAll();
可以根据实际情况遍历所有的匹配数据,找到你需要的字符集
CharsetMatch[] matches = detector.detectAll();