通过操作书签可以实现 word 模板替换变量的功能场景,本文一下代码内容,直接可以复制使用正常编译运行。
添加 maven 依赖
<dependencies>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>com.deepoove</groupId>
<artifactId>poi-tl</artifactId>
<version>1.12.0</version>
</dependency>
</dependencies>
操作书签的代码
1、ShanhyXWPFDocumentMerge.java
package org.example;
import com.deepoove.poi.xwpf.NiceXWPFDocument;
import com.deepoove.poi.xwpf.XmlXWPFDocumentMerge;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.impl.store.DomImpl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.w3c.dom.Node;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* 文档合并
*
* @author shanhy
*/
public class ShanhyXWPFDocumentMerge extends XmlXWPFDocumentMerge {
/**
* 将一个文档合并到另外一个文档指定段落的相对位置
*
* @param source 当前文档
* @param mergeIterator 被合入的文档
* @param targetParagraphNode 目标段落Node
* @param deleteTargetParagraph 是否删除目标段落自身
* @return
* @throws Exception
*/
public ShanhyXWPFDocument mergeToParagraphBefore(ShanhyXWPFDocument source, Iterator<ShanhyXWPFDocument> mergeIterator, Node targetParagraphNode, boolean deleteTargetParagraph) throws Exception {
CTBody body = source.getDocument().getBody();
List<String> addParts = createMergeableStrings(source, mergeIterator);
String[] startEnd = truncatedStartEndXmlFragment(body);
// CTP mergedContainer = paragraph.getCTP();
XmlOptions options = new XmlOptions();
options.setUseSameLocale(((DomImpl.Dom) targetParagraphNode).locale());
CTP mergedBody = CTP.Factory.parse(startEnd[0] + String.join("", addParts) + startEnd[1], options);
// instead insert xml-fragment?
// new XWPFParagraph(CTP.Factory.parse(mergedBody.getDomNode(), options), source);
Node mergedContainerParentNode = targetParagraphNode.getParentNode();
// 将引入文档的整体插入到目标段落之前
mergedContainerParentNode.insertBefore(mergedBody.getDomNode(), targetParagraphNode);
if(deleteTargetParagraph) {
// 删除掉目标段落自身
mergedContainerParentNode.removeChild(targetParagraphNode);
}
// mergedContainer.getDomNode().appendChild(CTP.Factory.parse(mergedBody.getDomNode(), options).getDomNode());
// mergedContainer.set(mergedBody);
// String xmlText = truncatedOverlapWP(body);
// body.set(CTBody.Factory.parse(xmlText));
// return source.generate(true);
return source;
}
/**
* 反射调用父类方法
*
* @param methodName
* @param params
* @throws NoSuchMethodException
* @throws InvocationTargetException
* @throws IllegalAccessException
*/
private Object invokeSuperMethod(String methodName, Class<?>[] paramClasses, Object[] params) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
Method method = this.getClass().getSuperclass().getDeclaredMethod(methodName, paramClasses);
method.setAccessible(true);
return method.invoke(this, params);
}
@SuppressWarnings("unchecked")
private List<String> createMergeableStrings(ShanhyXWPFDocument source, Iterator<ShanhyXWPFDocument> mergeIterator) {
try {
Object obj = invokeSuperMethod("createMergeableStrings", new Class[]{NiceXWPFDocument.class, Iterator.class}, new Object[]{source, mergeIterator});
if (obj instanceof List<?>) {
return (List<String>) obj;
}
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {
throw new RuntimeException(e);
}
return new ArrayList<>();
}
private String[] truncatedStartEndXmlFragment(CTBody body) {
try {
Object obj = invokeSuperMethod("truncatedStartEndXmlFragment", new Class[]{CTBody.class}, new Object[]{body});
return (String[]) obj;
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {
throw new RuntimeException(e);
}
}
private String truncatedOverlapWP(CTBody body) {
try {
Object obj = invokeSuperMethod("truncatedOverlapWP", new Class[]{CTBody.class}, new Object[]{body});
return (String) obj;
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {
throw new RuntimeException(e);
}
}
}
2、ShanhyXWPFDocument.java
package org.example;
import com.deepoove.poi.xwpf.NiceXWPFDocument;
import org.w3c.dom.Node;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
/**
* 处理 Docx 文档内容处理
*
* @author shanhy
*/
public class ShanhyXWPFDocument extends NiceXWPFDocument {
public ShanhyXWPFDocument(InputStream in) throws IOException {
super(in);
}
/**
* 将一个文档合并到另外一个文档指定段落的相对位置
*
* @param source 当前文档
* @param mergeIterator 被合入的文档
* @param targetParagraphNode 目标段落Node
* @param deleteTargetParagraph 是否删除目标段落自身
*
* @throws Exception
*/
public void mergeToParagraphBefore(ShanhyXWPFDocument source, Iterator<ShanhyXWPFDocument> mergeIterator, Node targetParagraphNode, boolean deleteTargetParagraph) throws Exception {
new ShanhyXWPFDocumentMerge().mergeToParagraphBefore(this, mergeIterator, targetParagraphNode, deleteTargetParagraph);
}
}
3、ShanhyDocxBookmarkService.java
package org.example;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.impl.store.DomImpl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* 书签替换处理类
*
* @author shanhy
*/
public class ShanhyDocxBookmarkService {
/**
* 因为docx为xml格式的结构,一下为docx中定义的部分常量引用
**/
public static final String RUN_NODE_NAME = "w:r";
public static final String TEXT_NODE_NAME = "w:t";
public static final String BOOKMARK_START_TAG = "w:bookmarkStart";
public static final String BOOKMARK_END_TAG = "w:bookmarkEnd";
public static final String BOOKMARK_ID_ATTR_ID = "w:id";
public static final String NODENAME_BODY = "w:body";
public static final String NODENAME_PARAGRAPH = "w:p";
public static final String BOOKMARK_ID_ATTR_NAME = "w:name";
public static final String STYLE_NODE_NAME = "w:rPr";
public static final String PARAGRAPH_PROPERTIES_NAME = "w:pPr";
/**
* 读取 docx 文件中的所有书签(注意不支持书签嵌套,书签嵌套书签的情况只识别最外层书签)
*
* @param docx
*/
public List<Node> getBookmarksFromDocx(XWPFDocument docx) {
Node bodyNode = docx.getDocument().getBody().getDomNode();
// 递归读取 bookmarkStart 节点,返回bookmarkNode集合(<w:bookmarkStart w:id="1" w:name="书签名称"/>)
List<Node> bookmarkNodeList = new ArrayList<>();
getBookmarksFromNode(bodyNode, bookmarkNodeList);
return bookmarkNodeList;
}
/**
* 递归解析所有Node节点,将bookmark记录到集合中
*
* @param node
* @param bookmarkNodeList
*/
public void getBookmarksFromNode(Node node, List<Node> bookmarkNodeList) {
if (node.getNodeName().equals(BOOKMARK_START_TAG)) {
bookmarkNodeList.add(node);
} else if (node.getNodeName().equals(BOOKMARK_END_TAG) || node.getNodeName().equals(PARAGRAPH_PROPERTIES_NAME)) {
// Nothing
} else {
NodeList childNodes = node.getChildNodes();
for (int i = 0, j = childNodes.getLength(); i < j; i++) {
Node childNode = childNodes.item(i);
getBookmarksFromNode(childNode, bookmarkNodeList);
}
}
}
/**
* docx 文件中书签的替换
*
* @param docx
* @param outputStream
* @param dataMap
* @throws IOException
*/
public void replaceDocxBookmarks(ShanhyXWPFDocument docx, OutputStream outputStream, Map<String, Object> dataMap) throws Exception {
// 获取所有书签
List<Node> startBookmarkList = getBookmarksFromDocx(docx);
// 替换书签内容
for (Node startBookmarkNode : startBookmarkList) {
String bookmarkName = startBookmarkNode.getAttributes().getNamedItem("w:name").getNodeValue();
if (dataMap.containsKey(bookmarkName)) {
Object data = dataMap.get(bookmarkName);
if (data instanceof String) {// 内容是文本
replaceDocxBookmarkFromString(getFirstParentParagraphByNode(startBookmarkNode, docx), startBookmarkNode, (String) data);
} else if (data instanceof ShanhyXWPFDocument) { // 内容是外部 docx 文档
replaceDocxBookmarkFromDocx(docx, getFirstParentNodeByNode(startBookmarkNode, docx), startBookmarkNode, (ShanhyXWPFDocument) data);
} else {
throw new RuntimeException("替换书签的内容源数据格式暂不支持");
}
}
}
docx.write(outputStream);
docx.close();
}
/**
* 将一个docx文档替换到docx的书签中
* 1.要求书签尽量设置在文档的换行起点
* 2.被合入的docx会作为换行起点开始合入,如果bookmark没有设置在一个换行的起点,程序会自动从该bookmark节点开始寻找,
* 定位到父节点为body的对应p节点的下一个节点,然后将需要合入的文档所有内容追加到该p节点的下一个
*
* @param bookmarkParentParagraph
* @param startBookmarkNode
* @param content
*/
public void replaceDocxBookmarkFromString(XWPFParagraph bookmarkParentParagraph, Node startBookmarkNode, String content) {
Node nextNode = startBookmarkNode.getNextSibling();
boolean contentReplaced = false;
while (nextNode != null) {
// 节点是结束符
if (nextNode.getNodeName().equals(BOOKMARK_END_TAG)) {
break;
}
// 1.寻找startBookmark的下一个 w:r 节点,然后将该节点中的 w:t 节点的真正文字内容替换掉(这样可以保留原来的bookmark的文字样式)
if (!contentReplaced && nextNode.getNodeName().equals("w:r")) {
NodeList runChildNodes = nextNode.getChildNodes();
for (int i = 0, j = runChildNodes.getLength(); i < j; i++) {
if (runChildNodes.item(i).getNodeName().equals("w:t")) {// w:t 是真正的文本内容
runChildNodes.item(i).getFirstChild().setNodeValue(content);
}
}
contentReplaced = true;
} else {
// 2.然后继续向下删除 endBookmark 之前的所有节点
startBookmarkNode.getParentNode().removeChild(nextNode);
}
nextNode = nextNode.getNextSibling();
}
//1-end
if (!contentReplaced) {
XWPFRun run = bookmarkParentParagraph.createRun();
run.setText(content);
Node newChildNode = run.getCTR().getDomNode();
Node startBookmarkNextNode = startBookmarkNode.getNextSibling();
if (startBookmarkNextNode == null) {
startBookmarkNode.getParentNode().appendChild(newChildNode);
} else {
startBookmarkNode.getParentNode().insertBefore(newChildNode, startBookmarkNextNode);
}
}
}
/**
* 将一个docx文档替换到docx的书签中
* 1.要求书签尽量设置在文档的换行起点
* 2.被合入的docx会作为换行起点开始合入,如果bookmark没有设置在一个换行的起点,程序会自动从该bookmark节点开始寻找,
* 定位到父节点为body的对应p节点的下一个节点,然后将需要合入的文档所有内容追加到该p节点的下一个
*
* @param startBookmarkNode
* @param contentDocx
*/
public void replaceDocxBookmarkFromDocx(ShanhyXWPFDocument docx, Node bookmarkParentNode, Node startBookmarkNode, ShanhyXWPFDocument contentDocx) throws Exception {
//1-start.删除bookmark原始内容
Node nextNode = startBookmarkNode.getNextSibling();
while (nextNode != null) {
// 循环查找结束符
String nodeName = nextNode.getNodeName();
if (nodeName.equals(BOOKMARK_END_TAG)) {
break;
}
// 删除中间的非结束节点,即删除原书签内容
Node delNode = nextNode;
nextNode = nextNode.getNextSibling();
startBookmarkNode.getParentNode().removeChild(delNode);
}
//1-end
docx.mergeToParagraphBefore(docx, Collections.singletonList(contentDocx).iterator(), bookmarkParentNode, true);
}
/**
* 向上递归获得指定node节点的上一个p节点
*
* @param node
* @return
*/
public XWPFParagraph getFirstParentParagraphByNode(Node node, XWPFDocument docx) throws XmlException {
XmlOptions options = new XmlOptions();
options.setUseSameLocale(((DomImpl.Dom) node).locale());
return new XWPFParagraph(CTP.Factory.parse(getFirstParentNodeByNode(node, docx), options), docx);
}
/**
* 向上递归获得指定node节点的上一个p节点
*
* @param node
* @return
*/
public Node getFirstParentNodeByNode(Node node, XWPFDocument docx) throws XmlException {
Node parentNode = node.getParentNode();
if (NODENAME_PARAGRAPH.equals(parentNode.getNodeName())) {
return parentNode;
}
return getFirstParentNodeByNode(parentNode, docx);
}
}
4、BookmarkDemo.java
package org.example;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
public class BookmarkDemo {
public static void main(String[] args) throws Exception {
long start = System.currentTimeMillis();
ShanhyDocxBookmarkService bookmarkService = new ShanhyDocxBookmarkService();
// 读取 docx 文件中的所有书签(基于解析xmlDom文档读取)
// List<Node> bookmarkNodeList = bookmarkService.getBookmarksFromDocx(new XWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\模板_Password_Removed.docx"))));
// bookmarkNodeList.forEach(bookmarkNode -> {
// NamedNodeMap attrs = bookmarkNode.getAttributes();
// System.out.println("书签id=" + attrs.getNamedItem(ShanhyDocxBookmarkService.BOOKMARK_ID_ATTR_ID).getNodeValue() + ", 书签name=" + attrs.getNamedItem(ShanhyDocxBookmarkService.BOOKMARK_ID_ATTR_NAME).getNodeValue() + "");
// });
// 替换 docx 文件中的 bookmark 内容
Map<String, Object> dataMap = new HashMap<>();
// dataMap.put("strong", "单红宇");
// dataMap.put("footnotes", "李小雨");
// dataMap.put("替换的书签", new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx"))));
// dataMap.put("Hello", new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx"))));
// dataMap.put("替换的书签", new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx"))));
ShanhyXWPFDocument mergeContentDocx = new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx")));
mergeContentDocx.getTables().forEach(tbl -> { tbl.setWidthType(TableWidthType.PCT); tbl.setWidth("100%"); });
dataMap.put("AAA", mergeContentDocx);
dataMap.put("String书签", "单红宇");
dataMap.put("没有内容的书签", "书签内容Hello");
// dataMap.put("测试docx书签", "内容内容内容");
// bookmarkService.replaceDocxBookmarks(new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\template.docx"))), Files.newOutputStream(Paths.get("D:\\Desktop\\docx\\out-2.docx")), dataMap);
bookmarkService.replaceDocxBookmarks(new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\template.docx"))), Files.newOutputStream(Paths.get("D:\\Desktop\\docx\\out-2.docx")), dataMap);
System.out.println("耗时=" + (System.currentTimeMillis() - start) + "ms");
}
}
测试的合并效果如下:
在编辑 word 书签时,可以开启显示书签模式,这样可以直观的 word 页面中看到书签标记,如下图所示:
如果你有获取 docx 的 xml 内容的需求,则代码为:
new XWPFDocument(inputStream).getDocument().xmlText();
new XWPFDocument(inputStream).getDocument().getBody().xmlText();
其他:
1、本文因为业务需求需要使用书签方式,如果你使用变量 {{username}}
、{{快递地址}}
这样的方式,则直接使用 poi-tl 库的直接操作会更简单。
2、poi-tl 是一个方便操作 word 的开源项目: https://github.com/Sayi/poi-tl
(END)