/**
* 读取Word 到数据库
* @param file
* @param request
* @throws IOException
* @throws ParseException
*/
@Override
public void importWord(MultipartFile file, HttpServletRequest request) throws IOException, ParseException {
FileInputStream in = (FileInputStream) file.getInputStream();
String regEx="[^0-9\\.]+";//自定义正则表达式:保留整数与小数(字符串取数字方法)
Pattern pattern = Pattern.compile(regEx);
XWPFDocument xwpf = new XWPFDocument(in);
// 获取word中的所有段落与表格
List<IBodyElement> elements = xwpf.getBodyElements();
//用定义好的正则表达式拆分字符串,,把字符串中的数字留出来
String dates[] = pattern.split(DocUtils.getParagraphText((XWPFParagraph));
for (IBodyElement element : elements) {
// 段落
if (element instanceof XWPFParagraph) {
String paragraphText = DocUtils.getParagraphText((XWPFParagraph) element);
} else if (element instanceof XWPFTable) {
// 表格
Map<String, List<String>> tabelText = DocUtils.getTabelText((XWPFTable) element);
}
}
}
依赖相关记不住了,pom里面前两个好像就够了,可参考下图:
文档截图:
工具类:
package com.essence.business.sxz.common.util;
import org.apache.poi.xwpf.usermodel.*;
import java.util.*;
public class DocUtils {
/**
* docx 格式获取表格内容
*
* @param table
*/
public static Map<String, List<String>> getTabelText(XWPFTable table) {
Map<String, List<String>> result = new LinkedHashMap<>();
List<XWPFTableRow> rows = table.getRows();
for (XWPFTableRow row : rows) {
String key = null;
List<String> list = new ArrayList<>(16);
int i = 0;
List<XWPFTableCell> cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
// 简单获取内容(简单方式是不能获取字体对齐方式的)
StringBuffer sb = new StringBuffer();
// 一个单元格可以理解为一个word文档,单元格里也可以加段落与表格
List<XWPFParagraph> paragraphs = cell.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
sb.append(DocUtils.getParagraphText(paragraph));
}
if (i == 0) {
key = sb.toString();
} else {
String value = sb.toString();
list.add(value == null || Objects.deepEquals(value, "") ? null : value.replace(",", ""));
}
i++;
}
result.put(key, list);
}
return result;
}
/**
* docx 获取段落字符串
* 获取段落内容
*
* @param paragraph
*/
public static String getParagraphText(XWPFParagraph paragraph) {
StringBuffer runText = new StringBuffer();
// 获取段落中所有内容
List<XWPFRun> runs = paragraph.getRuns();
if (runs.size() == 0) {
return runText.toString();
}
for (XWPFRun run : runs) {
runText.append(run.text());
}
return runText.toString();
}
}
没写清楚的地方可以私信我