<!-- 读取-->
<!-- word-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.0</version>
</dependency>
<!-- pdf-->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.22</version>
</dependency>
<!-- ppt-->
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.presentation.free</artifactId>
<version>5.1.0</version>
</dependency>
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import com.spire.presentation.*;
import java.io.*;
//根据文件类型调用适当的处理方法
switch (fileTypeName(filePath)) {
case "doc","docx":
return getTextFromWord(filePath);
case "pdf":
return getTextFromPdf(filePath);
case "ppt","pptx":
return getTextFromPpt(filePath);
default:
throw new RuntimeException("不支持的文件格式,文件解析目前只支持(DOC/DOCX/PDF/PPT/PPTX)");
}