1.添加jar包
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.0</version>
</dependency>
2.读取所有内容(非表格)
public static String readDoc(String path) {
String resullt = "";
//首先判断文件中的是doc/docx
try {
if (path.endsWith(".doc")) {
InputStream is = new FileInputStream(new File(path));
WordExtractor extractor = new WordExtractor(is);
resullt = extractor.getText();
//输出word文档所有的文本
System.out.println(extractor.getText());
System.out.println("=================1=================");
System.out.println("==================2================");
// //输出页脚的内容
System.out.println("页脚:" + extractor.getDocument());
// System.out.println("===============4===================");
// //输出当前word文档的元数据信息,包括作者、文档的修改时间等。
System.out.println(extractor.getMetadataTextExtractor().getText());
System.out.println("===============5===================");
//获取各个段落的文本
String paraTexts[] = extractor.getParagraphText();
for (int i=0; i<paraTexts.length; i++) {
System.out.println("Paragraph " + (i+1) + " : " + paraTexts[i]);
}
//输出当前word的一些信息
System.out.println(extractor.getTextFromPieces());
System.out.println("=============6=====================");
//输出当前word的一些信息
System.out.println(extractor.getMetadataTextExtractor());
System.out.println("===============7===================");
System.out.println(extractor.getEndnoteText());
System.out.println("===============8===================");
extractor.close();
} else if (path.endsWith(".docx")) {
OPCPackage opcPackage = POIXMLDocument.openPackage(path);
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
resullt = extractor.getText();
extractor.close();
} else {
System.out.println("此文件不是word文件");
}
} catch(Exception e){
e.printStackTrace();
}
return resullt;
}
3.读取表格内容
以下代码包含读取段落内容、表格内容
public static void readTableData(String path){
try {
InputStream is = new FileInputStream(path);
XWPFDocument doc = new XWPFDocument(is);
List<XWPFParagraph> paras = doc.getParagraphs();
for (XWPFParagraph para : paras) {
//当前段落的属性
//CTPPr pr = para.getCTP().getPPr();
System.out.println(para.getText());
}
//获取文档中所有的表格
List<XWPFTable> tables = doc.getTables();
List<XWPFTableRow> rows;
List<XWPFTableCell> cells;
for (XWPFTable table : tables) {
//表格属性
//CTTblPr pr = table.getCTTbl().getTblPr();
//获取表格对应的行
rows = table.getRows();
for (XWPFTableRow row : rows) {
//获取行对应的单元格
cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
System.out.println(cell.getText());
}
}
}
is.close();
} catch (Exception e) {
e.printStackTrace();
}
}
参考文章:java poi word 表格_java 使用POI 读写word 表格 https://blog.csdn.net/weixin_33045961/article/details/114433011