使用poi对docx文件指定内容标注并高亮

package com.ruoyi;

import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.poi.xwpf.usermodel.TextSegement;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTEmptyImpl;
import org.w3c.dom.Node;

import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
import javax.xml.namespace.QName;
import java.io.*;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Locale;

/**
 * @program: ruoyi
 * @create: 2021-10-08 11:33
 * @author: sxl
 * @description:
 **/
public class MDocXTest {
    public static void main(String[] args) throws Exception {
        InputStream is = new FileInputStream(new File("D:\\核工业口述史(编辑稿)0423_妇女出版社.doc"));
        XWPFDocument doc = new XWPFDocument(is);
        XWPFCommentsDocument commentsDocument = createCommentsDocument(doc);
        for (XWPFParagraph p : doc.getParagraphs()) {
            if (p.getText().contains("聚会")) {
                List<TextSegement> segments = searchText(p, "聚会", new PositionInParagraph());
                List<XWPFRun> runs = p.getRuns();
                for(int j=0;j<segments.size();j++){
                    TextSegement segment = segments.get(j);
                    int beginRunIndex = 0;
                    int endRunIndex = 0;
                    if (segment != null) {
                        beginRunIndex = segment.getBeginRun()+j*2;
                        endRunIndex = segment.getEndRun()+j*2;
                    } else if (p.getText().contains("聚会")) {
                        // 当无法精确定位时,则批注当前段落
                        beginRunIndex = 0;
                        endRunIndex = runs.size() - 1;
                    } else {
                        System.out.println("未在段落{}中找到到对应内容{},批注被跳过"+ p.getText());
                        return;
                    }
                    XWPFRun begin;
                    XWPFRun end;
                    if (beginRunIndex == endRunIndex) {
                        XWPFRun run = runs.get(beginRunIndex);
                        String runText = run.getText(0);
                        if (runText.equals("聚会")) {
                            highLight(p, run);
                            begin = end = run;
                        } else {
                            begin = end = containsMatch(p, "聚会", run, beginRunIndex);
                        }
                    } else {
                        begin = runs.get(beginRunIndex);
                        end = runs.get(endRunIndex);
                        // 获得第一个run标签,匹配内容
                        String beginText = getBeginString(begin.text(), "聚会");
                        // 获得最后一个run标签,匹配内容
                        String endText = getEndString(end.text(), "聚会");
                        // 高亮中间的文本
                        for (int i = beginRunIndex + 1; i < endRunIndex; i++) {
                            XWPFRun run = runs.get(i);
                            highLight(p, run);
                        }
                        begin = beginMatch(p, beginText, begin, beginRunIndex);
                        end = endMatch(p, endText, end, endRunIndex);
                    }
                    addComment(commentsDocument, p, "sxl", "批注内容", begin, end);
                }
             }
        }
        File file = new File("D:\\test2.docx");
        FileOutputStream out = new FileOutputStream(file);
        doc.write(out);
        out.close();
        doc.close();

    }

    private static XWPFRun containsMatch(XWPFParagraph p, String content, XWPFRun run, int runIndex) {
        String runText = run.text();
        String[] arr = runText.split(content);

        run.setText(arr[0], 0);

        XWPFRun newRun = p.insertNewRun(runIndex + 1);
        newRun.setText(content);
        highLight(p, newRun);

        p.insertNewRun(runIndex + 2).setText(arr.length==1?content:arr[1]);
        return newRun;
    }
    private static void highLight(XWPFParagraph p, XWPFRun run) {
        CTRPr pRpr = getRunCTRPr(p, run);
        CTHighlight highlight = pRpr.isSetHighlight() ? pRpr
                .getHighlight() : pRpr.addNewHighlight();
        highlight.setVal(STHighlightColor.YELLOW);
    }
    private static XWPFRun beginMatch(XWPFParagraph p, String content, XWPFRun run, int beginRunIndex) {
        String runText = run.text();
        if (runText.equals(content)) {
            highLight(p, run);
            return run;
        }
        run.setText(runText.replace(content, ""), 0);
        XWPFRun newRun = p.insertNewRun(beginRunIndex + 1);
        newRun.setText(content);
        highLight(p, newRun);
        return newRun;
    }
   /**
     * 设置指定段落高亮
     */
    /*  private void highLightParagraph(XWPFParagraph p, String content) {
         TextSegement segment = p.searchText(content, new PositionInParagraph());

         int beginRunIndex = segment.getBeginRun();
         int endRunIndex = segment.getEndRun();
         List<XWPFRun> runs = p.getRuns();

         if (beginRunIndex == endRunIndex) {
             XWPFRun run = runs.get(beginRunIndex);
             String runText = run.getText(0);
             if (runText.equals(content)) {
                 highLight(p, run);
             } else {
                 containsMatch(p, content, run, beginRunIndex);
             }
         } else {
             begin = runs.get(beginRunIndex);
             end = runs.get(endRunIndex);
             // 获得第一个run标签,匹配内容
             String beginText = getBeginString(begin.text(), content);
             // 获得最后一个run标签,匹配内容
             String endText = getEndString(end.text(), content);
             // 高亮中间的文本
             for (int i = beginRunIndex + 1; i < endRunIndex; i++) {
                 XWPFRun run = runs.get(i);
                 highLight(p, run);
             }
         }
     }*/
    private static XWPFRun endMatch(XWPFParagraph p, String content, XWPFRun run, int endRunIndex) {
        String runText = run.text();
        if (runText.equals(content)) {
            highLight(p, run);
            return run;
        }
        run.setText(runText.replace(content, ""), 0);
        XWPFRun newRun = p.insertNewRun(endRunIndex);
        newRun.setText(content);
        highLight(p, newRun);
        return newRun;
    }


    private static void addComment(XWPFCommentsDocument commentsDocument, XWPFParagraph paragraph, String author, String content, XWPFRun begin, XWPFRun end) {
        BigInteger commentId = BigInteger.valueOf(1);

        CTComments comments = commentsDocument.getComments();
        CTComment ctComment = comments.addNewComment();

        ctComment.setAuthor(author);
        ctComment.setInitials(author.substring(0,1));
        ctComment.setDate(new GregorianCalendar(Locale.CHINA));
        ctComment.addNewP().addNewR().addNewT().setStringValue(content);
        ctComment.setId(commentId);

        CTMarkupRange rangeStart = paragraph.getCTP().addNewCommentRangeStart();
        rangeStart.setId(commentId);
        begin.getCTR().getDomNode().insertBefore(rangeStart.getDomNode(), begin.getCTR().getDomNode().getFirstChild());

        CTMarkup reference = paragraph.getCTP().addNewR().addNewCommentReference();
        reference.setId(commentId);
        CTMarkupRange rangeEnd = paragraph.getCTP().addNewCommentRangeEnd();
        rangeEnd.setId(commentId);
        // 获得结束点之后的Node对象
        Node next = end.getCTR().getDomNode().getNextSibling();
        if (next != null) {
            next.insertBefore(reference.getDomNode(), next.getFirstChild());
            next.insertBefore(rangeEnd.getDomNode(), next.getFirstChild());
        }
    }

    private static XWPFCommentsDocument createCommentsDocument(XWPFDocument document) throws InvalidFormatException, InvalidFormatException {
        OPCPackage opcPackage = document.getPackage();
        PackagePartName partName = PackagingURIHelper.createPartName("/word/comments.xml");
        PackagePart part = opcPackage.createPart(partName, "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml");
        XWPFCommentsDocument commentsDocument = new XWPFCommentsDocument(part);

        String rId = "rId" + (document.getRelationParts().size() + 1);

        document.addRelation(rId, XWPFRelation.COMMENT, commentsDocument);
        return commentsDocument;
    }

    private static class XWPFCommentsDocument extends POIXMLDocumentPart {

        private final CTComments comments;

        private XWPFCommentsDocument(PackagePart part) {
            super(part);
            comments = CommentsDocument.Factory.newInstance().addNewComments();
        }

        private CTComments getComments() {
            return comments;
        }

        @Override
        protected void commit() throws IOException {
            XmlOptions xmlOptions = new XmlOptions(DEFAULT_XML_OPTIONS);
            xmlOptions.setSaveSyntheticDocumentElement(new QName(CTComments.type.getName().getNamespaceURI(), "comments"));
            PackagePart part = getPackagePart();
            OutputStream out = part.getOutputStream();
            comments.save(out, xmlOptions);
            out.close();
        }
    }
    /**
     * 得到XWPFRun的CTRPr
     */
    public static CTRPr getRunCTRPr(XWPFParagraph p, XWPFRun pRun) {
        CTRPr pRpr;
        if (pRun.getCTR() != null) {
            pRpr = pRun.getCTR().getRPr();
            if (pRpr == null) {
                pRpr = pRun.getCTR().addNewRPr();
            }
        } else {
            pRpr = p.getCTP().addNewR().addNewRPr();
        }
        return pRpr;
    }
    /**
     * POI本身的searchText不排除CTEmptyImpl的情况导致查不到文本
     */
    public static List<TextSegement> searchText(XWPFParagraph paragraph, String searched, PositionInParagraph startPos) {
        int startRun = startPos.getRun(),
        startText = startPos.getText(),
        startChar = startPos.getChar();
        int beginRunPos = 0, candCharPos = 0;
        boolean newList = false;
        List<TextSegement> segList = new ArrayList<>();
        CTR[] rArray = paragraph.getCTP().getRArray();
        for (int runPos = startRun; runPos < rArray.length; runPos++) {
            int beginTextPos = 0, beginCharPos = 0, textPos = 0, charPos = 0;
            CTR ctRun = rArray[runPos];
            XmlCursor c = ctRun.newCursor();
            c.selectPath("./*");
            try {
                while (c.toNextSelection()) {
                    XmlObject o = c.getObject();
                    if (o instanceof CTText) {
                        if (textPos >= startText) {
                            String candidate = ((CTText) o).getStringValue();
                            if (runPos == startRun) {
                                charPos = startChar;
                            } else {
                                charPos = 0;
                            }

                            for (; charPos < candidate.length(); charPos++) {
                                if ((candidate.charAt(charPos) == searched.charAt(0)) && (candCharPos == 0)) {
                                    beginTextPos = textPos;
                                    beginCharPos = charPos;
                                    beginRunPos = runPos;
                                    newList = true;
                                }
                                if (candidate.charAt(charPos) == searched.charAt(candCharPos)) {
                                    if (candCharPos + 1 < searched.length()) {
                                        candCharPos++;
                                    } else if (newList) {
                                        TextSegement segment = new TextSegement();
                                        segment.setBeginRun(beginRunPos);
                                        segment.setBeginText(beginTextPos);
                                        segment.setBeginChar(beginCharPos);
                                        segment.setEndRun(runPos);
                                        segment.setEndText(textPos);
                                        segment.setEndChar(charPos);
                                        segList.add(segment);
                                    }
                                } else {
                                    candCharPos = 0;
                                }
                            }
                        }
                        textPos++;
                    } else if (o instanceof CTProofErr) {
                        c.removeXml();
                    } else if (o instanceof CTRPr || o instanceof CTEmptyImpl) {
                        //do nothing
                    } else {
                        candCharPos = 0;
                    }
                }
            } finally {
                c.dispose();
            }
        }
        return  segList;
    }

    public static String getBeginString(String begin, String content) {
        for (int i = 0; i < begin.length(); i++) {
            if (content.startsWith(begin)) {
                return begin;
            }
            begin = begin.substring(1);
        }
        return begin;
    }

    public static String getEndString(String end, String content) {
        for (int i = end.length() - 1; i > 0; i--) {
            if (content.endsWith(end)) {
                return end;
            }
            end = end.substring(0, i);
        }
        return end;
    }

}

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值