业务需要处理WORD,对占位符进行替换,用到POI发现getRuns()获取的占位符不一定连续,度娘没解决就自己写了个解决方法。
代码功能是将段落、表格中的占位符 “${AAA|BBB}” 通过一个布尔值进行判断,替换为 “|” 前、后的内容。
注意!!没有对单独出现的 $, {, } 进行判断和处理!
package com.emp.backend.util;
import org.apache.poi.xwpf.usermodel.*;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author 寰恒丶
* @version 1.0
* @description: 处理word文件
* @date 2023/10/24 19:38
*/
public class MyWordUtil {
private static final String placeHolderRegex = "\\$\\{[\\u4e00-\\u9fa5,0-9,A-Z,a-z,*]+\\|[\\u4e00-\\u9fa5,0-9,A-Z,a-z,*]+?\\}";
private static final String namedInfoRegex = "(?<=\\{)[^\\|]+";
private static final String anonymousInfoRegex = "(?<=\\|)[^\\}]+";
private static final Pattern placeHolderPattern = Pattern.compile(placeHolderRegex);
private static final Pattern namedPattern = Pattern.compile(namedInfoRegex);
private static final Pattern anonymousPattern = Pattern.compile(anonymousInfoRegex);
private static final String placeHolderHead = "$";
private static final String placeHolderTail = "}";
public static void replacePlaceHolder(String srcFile, String targetFile, Boolean named) throws IOException {
XWPFDocument docx = new XWPFDocument(Files.newInputStream(Paths.get(srcFile)));
List<IBodyElement> elements = docx.getBodyElements();
for (IBodyElement element : elements) {
// 处理段落
if(element instanceof XWPFParagraph){
replaceParagraphContent((XWPFParagraph) element, named);
}
// 处理表格
if(element instanceof XWPFTable){
replaceTableContent((XWPFTable) element, named);
}
}
FileOutputStream fileOutputStream = new FileOutputStream(targetFile);
docx.write(fileOutputStream);
fileOutputStream.flush();
fileOutputStream.close();
}
/**
* 获取占位符信息
* @param inputStream
*/
public static List<String> getPlaceholderList(InputStream inputStream) throws IOException {
XWPFDocument doc = new XWPFDocument(inputStream);
List<XWPFParagraph> paragraphs = doc.getParagraphs();
List<XWPFTable> tables = doc.getTables();
List<String> res = new ArrayList<>();
// 遍历段落
for (XWPFParagraph paragraph : paragraphs) {
Matcher matcher = placeHolderPattern.matcher(paragraph.getParagraphText());
while(matcher.find()){
res.add(matcher.group());
}
}
// 遍历表格
for(XWPFTable table: tables){
for(XWPFTableRow row: table.getRows()){
for(XWPFTableCell cell: row.getTableCells()){
Matcher matcher = placeHolderPattern.matcher(cell.getText());
while(matcher.find()){
res.add(matcher.group());
}
}
}
}
return res;
}
/**
* ${非匿名信息|匿名信息} -> Map<${非匿名信息|匿名信息}, 非匿名信息>
*/
public static Map<String, String> getNamedInfoMap(List<String> placeHolderList){
Map<String, String> res = new HashMap<>();
for (String placeHolder : placeHolderList) {
res.put(placeHolder, getNamedInfoFromPlaceHolder(placeHolder));
}
return res;
}
private static String getNamedInfoFromPlaceHolder(String placeHolder){
Matcher matcher = namedPattern.matcher(placeHolder);
if (matcher.find()){
return matcher.group();
}
return placeHolder;
}
/**
* ${非匿名信息|匿名信息} -> Map<${非匿名信息|匿名信息}, 匿名信息>
*/
private static Map<String, String> getAnonymousInfoMap(List<String> placeHolderList){
Map<String, String> res = new HashMap<>();
for (String placeHolder : placeHolderList) {
res.put(placeHolder, getAnonymousInfoFromPlaceHolder(placeHolder));
}
return res;
}
private static String getAnonymousInfoFromPlaceHolder(String placeHolder){
Matcher matcher = anonymousPattern.matcher(placeHolder);
if (matcher.find()){
return matcher.group();
}
return placeHolder;
}
private static void replaceParagraphContent(XWPFParagraph paragraph, Boolean named){
List<XWPFRun> runs = paragraph.getRuns();
for (int i=0; i<runs.size(); i++) {
XWPFRun curRun = runs.get(i);
String curText = curRun.getText(0);
// 由于getRuns()获得的text不一定连续,需要特殊处理
// 若只包含“$”,则向后遍历, 将完整占位符放到当前位置, 其余run置空
if(containPlaceHolderHead(curText) && !containPlaceHolderTail(curText)){
// System.out.println("发现不连续占位符: " + runs.get(i).getText(0));
StringBuilder newTextBuilder = new StringBuilder();
newTextBuilder.append(curText);
int j = i + 1;
while (j < runs.size()) {
String tempText = runs.get(j).getText(0);
newTextBuilder.append(tempText);
if(containPlaceHolderTail(tempText)) break;
j++;
}
// 成功找到“}”, 不成功则 j == runs.size()
if(j != runs.size()){
curRun.setText(newTextBuilder.toString(), 0);
curText = curRun.getText(0); // 更新
for(int k = i+1; k <= j; k++ ){
runs.get(k).setText("", 0);
}
// System.out.println("不连续占位符处理后: " + runs.get(i).getText(0));
}
}
Matcher matcher = placeHolderPattern.matcher(curText);
// 可应对同一run有多个占位符
while(matcher.find()){
String matchedString = matcher.group();
// System.out.println("before:" + curText);
if (named)
curText = curText.replace(matchedString, getNamedInfoFromPlaceHolder(matchedString));
else
curText = curText.replace(matchedString, getAnonymousInfoFromPlaceHolder(matchedString));
// System.out.println("after:" + curText);
}
curRun.setText(curText, 0);
}
}
private static void replaceTableContent(XWPFTable table, Boolean named) {
for (XWPFTableRow row : table.getRows()) {
for (XWPFTableCell cell : row.getTableCells()) {
for (XWPFParagraph paragraph : cell.getParagraphs()) {
replaceParagraphContent(paragraph, named);
}
}
}
}
private static boolean containPlaceHolderHead(String content){
return content.contains(placeHolderHead);
}
private static boolean containPlaceHolderTail(String content){
return content.contains(placeHolderTail);
}
}