POI getRuns() 非连续占位符处理

业务需要处理WORD,对占位符进行替换,用到POI发现getRuns()获取的占位符不一定连续,度娘没解决就自己写了个解决方法。
代码功能是将段落、表格中的占位符 “${AAA|BBB}” 通过一个布尔值进行判断,替换为 “|” 前、后的内容。
注意!!没有对单独出现的 $, {, } 进行判断和处理!

package com.emp.backend.util;

import org.apache.poi.xwpf.usermodel.*;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author 寰恒丶
 * @version 1.0
 * @description: 处理word文件
 * @date 2023/10/24 19:38
 */
public class MyWordUtil {
    private static final String placeHolderRegex = "\\$\\{[\\u4e00-\\u9fa5,0-9,A-Z,a-z,*]+\\|[\\u4e00-\\u9fa5,0-9,A-Z,a-z,*]+?\\}";
    private static final String namedInfoRegex = "(?<=\\{)[^\\|]+";
    private static final String anonymousInfoRegex = "(?<=\\|)[^\\}]+";
    private static final Pattern placeHolderPattern = Pattern.compile(placeHolderRegex);
    private static final Pattern namedPattern = Pattern.compile(namedInfoRegex);
    private static final Pattern anonymousPattern = Pattern.compile(anonymousInfoRegex);
    private static final String placeHolderHead = "$";
    private static final String placeHolderTail = "}";

    public static void replacePlaceHolder(String srcFile, String targetFile, Boolean named) throws IOException {
        XWPFDocument docx = new XWPFDocument(Files.newInputStream(Paths.get(srcFile)));
        List<IBodyElement> elements = docx.getBodyElements();
        for (IBodyElement element : elements) {
            // 处理段落
            if(element instanceof XWPFParagraph){
                replaceParagraphContent((XWPFParagraph) element, named);
            }
            // 处理表格
            if(element instanceof XWPFTable){
                replaceTableContent((XWPFTable) element, named);
            }
        }
        FileOutputStream fileOutputStream = new FileOutputStream(targetFile);
        docx.write(fileOutputStream);
        fileOutputStream.flush();
        fileOutputStream.close();
    }

    /**
     * 获取占位符信息
     * @param inputStream
     */
    public static List<String> getPlaceholderList(InputStream inputStream) throws IOException {
        XWPFDocument doc = new XWPFDocument(inputStream);
        List<XWPFParagraph> paragraphs = doc.getParagraphs();
        List<XWPFTable> tables = doc.getTables();
        List<String> res = new ArrayList<>();

        // 遍历段落
        for (XWPFParagraph paragraph : paragraphs) {
            Matcher matcher = placeHolderPattern.matcher(paragraph.getParagraphText());
            while(matcher.find()){
                res.add(matcher.group());
            }
        }

        // 遍历表格
        for(XWPFTable table: tables){
            for(XWPFTableRow row: table.getRows()){
                for(XWPFTableCell cell: row.getTableCells()){
                    Matcher matcher = placeHolderPattern.matcher(cell.getText());
                    while(matcher.find()){
                        res.add(matcher.group());
                    }
                }
            }
        }
        return res;
    }


    /**
     * ${非匿名信息|匿名信息} -> Map<${非匿名信息|匿名信息}, 非匿名信息>
     */
    public static Map<String, String> getNamedInfoMap(List<String> placeHolderList){
        Map<String, String> res = new HashMap<>();
        for (String placeHolder : placeHolderList) {
            res.put(placeHolder, getNamedInfoFromPlaceHolder(placeHolder));
        }
        return res;
    }

    private static String getNamedInfoFromPlaceHolder(String placeHolder){
        Matcher matcher = namedPattern.matcher(placeHolder);
        if (matcher.find()){
            return matcher.group();
        }
        return placeHolder;
    }

    /**
     * ${非匿名信息|匿名信息} -> Map<${非匿名信息|匿名信息}, 匿名信息>
     */
    private static Map<String, String> getAnonymousInfoMap(List<String> placeHolderList){
        Map<String, String> res = new HashMap<>();
        for (String placeHolder : placeHolderList) {
            res.put(placeHolder, getAnonymousInfoFromPlaceHolder(placeHolder));
        }
        return res;
    }

    private static String getAnonymousInfoFromPlaceHolder(String placeHolder){
        Matcher matcher = anonymousPattern.matcher(placeHolder);
        if (matcher.find()){
            return matcher.group();
        }
        return placeHolder;
    }

    private static void replaceParagraphContent(XWPFParagraph paragraph, Boolean named){
        List<XWPFRun> runs = paragraph.getRuns();
        for (int i=0; i<runs.size(); i++) {
            XWPFRun curRun = runs.get(i);
            String curText = curRun.getText(0);

            // 由于getRuns()获得的text不一定连续,需要特殊处理
            // 若只包含“$”,则向后遍历, 将完整占位符放到当前位置, 其余run置空
            if(containPlaceHolderHead(curText) && !containPlaceHolderTail(curText)){
//                System.out.println("发现不连续占位符: " + runs.get(i).getText(0));
                StringBuilder newTextBuilder = new StringBuilder();
                newTextBuilder.append(curText);
                int j = i + 1;
                while (j < runs.size()) {
                    String tempText = runs.get(j).getText(0);
                    newTextBuilder.append(tempText);
                    if(containPlaceHolderTail(tempText)) break;
                    j++;
                }
                // 成功找到“}”, 不成功则 j == runs.size()
                if(j != runs.size()){
                    curRun.setText(newTextBuilder.toString(), 0);
                    curText = curRun.getText(0);    // 更新
                    for(int k = i+1; k <= j; k++ ){
                        runs.get(k).setText("", 0);
                    }
//                    System.out.println("不连续占位符处理后: " + runs.get(i).getText(0));
                }
            }

            Matcher matcher = placeHolderPattern.matcher(curText);
            // 可应对同一run有多个占位符
            while(matcher.find()){
                String matchedString = matcher.group();
//                System.out.println("before:" + curText);
                if (named)
                    curText = curText.replace(matchedString, getNamedInfoFromPlaceHolder(matchedString));
                else
                    curText = curText.replace(matchedString, getAnonymousInfoFromPlaceHolder(matchedString));
//                System.out.println("after:" + curText);
            }
            curRun.setText(curText, 0);
        }
    }
    
    private static void replaceTableContent(XWPFTable table, Boolean named) {
        for (XWPFTableRow row : table.getRows()) {
            for (XWPFTableCell cell : row.getTableCells()) {
                for (XWPFParagraph paragraph : cell.getParagraphs()) {
                    replaceParagraphContent(paragraph, named);
                }
            }
        }
    }

    private static boolean containPlaceHolderHead(String content){
        return content.contains(placeHolderHead);
    }

    private static boolean containPlaceHolderTail(String content){
        return content.contains(placeHolderTail);
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
POI中根据占位符获取当前页码,需要先找到对应的占位符所在的段落,然后再结合页面布局信息计算当前页码。以下是一个示例代码: ```java // 查找占位符对应的段落 String placeholder = "[PAGE_NUMBER]"; XWPFDocument document = ...; // 文档对象 XWPFParagraph placeholderParagraph = null; for (XWPFParagraph paragraph : document.getParagraphs()) { String text = paragraph.getText(); if (text != null && text.contains(placeholder)) { placeholderParagraph = paragraph; break; } } // 计算当前页码 if (placeholderParagraph != null) { IBody body = placeholderParagraph.getBody(); int currentPos = body.getPosOfParagraph(placeholderParagraph); CTSectPr sectPr = document.getDocument().getBody().getSectPr(); CTPageNumber pageNumber = sectPr.getPGNumType().getStart(); int startPage = pageNumber.getVal().intValue(); // 起始页码 int totalPages = 0; for (XWPFParagraph paragraph : document.getParagraphs()) { if (paragraph.getBody() == body) { totalPages++; } } int currentPage = startPage + (int)Math.ceil((double)currentPos / totalPages); System.out.println("当前页码:" + currentPage); } else { System.out.println("未找到占位符"); } ``` 这里假设占位符是一个字符串,例如"[PAGE_NUMBER]",可以根据这个字符串来查找对应的段落。然后计算当前页码的方法和上一个回答中的一样,都是根据段落在整个文档中的位置和页面布局信息来计算的。 需要注意的是,如果同一个占位符出现在多个段落中,那么计算出来的页码可能不同,因为每个段落在页面中的位置可能不同。如果要保证同一个占位符计算出来的页码相同,可以考虑在文档中只使用一个占位符,并且只放在文档的开头或结尾等固定位置。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值