1.pom引入以下依赖,尽量版本使用下面的,poi-ooxml包可以引用,也可以不引用
<!-- Tika -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.24.1</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.24.1</version>
</dependency>
<!--io常用工具类 -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<!-- poi工具 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
2.工具类
package com.imooc;
import java.io.*;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
* 文本内容读取工具类
* 目前测试支持格式:properties、bat、docx、pdf、txt、html、xlsx、pptx、ppt。其他暂没测试
*/
public class TikaUtil {
/**
* 读取文件内容
* @param file 要读取的文件
* @return
* @throws Exception
*/
public static String getBody(File file){
if(file==null||!file.exists()){
return null;
}
ContentHandler handler = null;
Parser parser = new AutoDetectParser();
InputStream input = null;
try {
input = new FileInputStream(file);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
Metadata meta = new Metadata();
handler = new BodyContentHandler();
try {
parser.parse(input, handler, meta, new ParseContext());
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (TikaException e) {
e.printStackTrace();
}
return handler.toString();
}
public static void main(String[] args) {
String body = null;
try {
body = getBody(new File("文件路径"));
} catch (Exception e) {
e.printStackTrace();
}
System.out.println(body);
}
}