public static void main(String[] args) throws IOException {
// 读取Word文件, poi 4.1.2测试
String filePath = "C:\\Users\\W10\\Downloads\\xxx.docx";
XWPFDocument document = new XWPFDocument(Files.newInputStream(Paths.get(filePath)));
// 获取段落和表格
List<IBodyElement> elements = document.getBodyElements();
for (IBodyElement element : elements) {
if (element instanceof XWPFParagraph) {
getParagraphText((XWPFParagraph) element);
} else if (element instanceof XWPFTable) {
getTableText((XWPFTable) element);
}
}
}
/**
* 获取段落内容
*
* @param paragraph
*/
private static void getParagraphText(XWPFParagraph paragraph) {
// 获取段落中所有内容
List<XWPFRun> runs = paragraph.getRuns();
if (runs.size() == 0) {
System.out.println("按了回车(新段落)");
return;
}
StringBuffer runText = new StringBuffer();
for (XWPFRun run : runs) {
runText.append(run.text());
// 判断当前段落是否图片
List<XWPFPicture> pictures = run.getEmbeddedPictures();
if (CollUtil.isNotEmpty(pictures)) {
for (XWPFPicture picture : pictures) {
double width = picture.getWidth();
double depth = picture.getDepth();
XWPFPictureData pictureData = picture.getPictureData();
String fileName = pictureData.getFileName();
int pictureType = pictureData.getPictureType();
byte[] data = pictureData.getData();
Long checksum = pictureData.getChecksum();
System.out.println(width + ", " + fileName + ", " + pictureType + ", " + checksum);
}
}
}
if (runText.length() > 0) {
runText.append(",对齐方式:").append(paragraph.getAlignment().name());
System.out.println(runText);
}
}
/**
* 获取表格内容
*
* @param table
*/
private static void getTableText(XWPFTable table) {
List<XWPFTableRow> rows = table.getRows();
for (XWPFTableRow row : rows) {
List<XWPFTableCell> cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
// 简单获取内容(简单方式是不能获取字体对齐方式的)
// System.out.println(cell.getText());
// 一个单元格可以理解为一个word文档,单元格里也可以加段落与表格
List<XWPFParagraph> paragraphs = cell.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
getParagraphText(paragraph);
}
}
}
}
apache poi解析读取word文档段落,表格,图片
于 2023-03-21 17:58:37 首次发布