Java将doc,docx文档中指定的标签替换为文本
1 需求描述
用户提供word文件,需要开发功能,在word限定的位置,后台更新用户一些操作后输入的值
实现思路:通过在word待插入位置预置书签占位(这种方式不影响用户直观视觉体验),然后用poi去读取并替换这些书签,改为真实值
本案例用的文档
2 最终效果展示
3 代码实现
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.impl.store.DomImpl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* 使用 POI 读取 doc / docx 中的书签、替换书签内容
* <!-- word加标签 start-->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>com.deepoove</groupId>
<artifactId>poi-tl</artifactId>
<version>1.12.0</version>
</dependency>
<!-- word加标签 end-->
*/
public class TestWordBookmarkToString{
/**
* 测试函数.
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
long start = System.currentTimeMillis();
final LocalTime time = LocalTime.now();
// 替换 docx 文件中的 bookmark 内容
final String attr = ".docx";
final String inPath = "F:\\BiaoQian" + attr;
final String outPath = "F:\\BiaoQianNew" + time.format(DateTimeFormatter.ofPattern("HH时mm分ss秒SSS")) + attr;
final InputStream inputStream = Files.newInputStream(Paths.get(inPath));
final OutputStream outputStream = Files.newOutputStream(Paths.get(outPath));
final boolean delBookMark = true;//替换完成是否删除 替换成功的标签
if (inPath.endsWith("docx")) {//docx处理
final Map<String, String> map = new HashMap<>();
map.put("demo1", "张三");
map.put("demo2", "男");
docxOperate(inputStream, outputStream, map, delBookMark);
} else {//doc处理
final Map<String, String> map = new HashMap<>();
map.put("demo1", "张三");
map.put("demo2", "男");
docOperate(inputStream, outputStream, map, delBookMark);
}
System.out.println("执行完成,耗时" + (System.currentTimeMillis() - start)/1000 + "秒");
}
/**
* doc标签替换
*/
public static void docOperate(InputStream inputStream,OutputStream outputStream,
Map<String,String> dataMap,boolean delBookMark) throws IOException {
//是否需要 移除 已经替换的标签
final HWPFDocument document = new HWPFDocument(inputStream);
final Bookmarks bookmarks = document.getBookmarks();
final int no = bookmarks.getBookmarksCount();
final Set<String> hasChange = new HashSet<String>();
for(int dwI = 0;dwI < no;dwI++){
final Bookmark bookmark = bookmarks.getBookmark(dwI);
final String mark = bookmark.getName();
if(dataMap.containsKey(mark)){
hasChange.add(mark);
final int s = bookmark.getStart();
final int e = bookmark.getEnd();
final String repValue = dataMap.get(bookmark.getName());
System.out.println("执行替换:"+mark+"--->"+repValue);
final Range range = new Range(s, e ,document);
range.insertAfter(repValue);
//range.replaceText(repValue, false);//这个不好用
}
}
if (delBookMark) {
System.out.println("执行已经替换的书签的移除");
for (String hasmark : hasChange) {
for(int dwI = 0;dwI < bookmarks.getBookmarksCount();dwI++){
final String mark = bookmarks.getBookmark(dwI).getName();
if (mark.equals(hasmark)) {
bookmarks.remove(dwI);//找到了就移除
break;
}
}
}
}
document.write(outputStream);
document.close();
outputStream.close();
inputStream.close();
}
/**
* docx标签替换
*/
public static void docxOperate(InputStream inputStream,OutputStream outputStream,Map<String,String> dataMap,boolean delBookMark) throws Exception {
final Set<String> hasChange = new HashSet<String>();
@SuppressWarnings("resource")
final XWPFDocument docx = new XWPFDocument(inputStream).getXWPFDocument();
final Node bodyNode = docx.getDocument().getBody().getDomNode();
// 递归读取 bookmarkStart 节点,返回bookmarkNode集合(<w:bookmarkStart w:id="1" w:name="书签名称"/>)
final List<Node> bookmarkNodeList = new ArrayList<>();
getBookmarksFromNode(bodyNode, bookmarkNodeList, false);
// 替换书签内容
for (Node startBookmarkNode : bookmarkNodeList) {
final String bookmarkName = startBookmarkNode.getAttributes().getNamedItem("w:name").getNodeValue();
if (dataMap.containsKey(bookmarkName)) {
final String data = dataMap.get(bookmarkName);
if (data instanceof String) {// 内容是文本
System.out.println("执行替换:"+bookmarkName+"--->"+data);
hasChange.add(bookmarkName);
replaceDocxBookmarkFromString(getFirstParentParagraphByNode(startBookmarkNode, docx), startBookmarkNode, data);
}
}
}
if (delBookMark) {
System.out.println("执行已经替换的书签的移除");
final List<Node> bookmarkAllNodeList = new ArrayList<>();
getBookmarksFromNode(bodyNode, bookmarkAllNodeList, false);
for (Node bookmarkNode : bookmarkAllNodeList) {
System.out.println(bookmarkNode.getNodeName());
final Node nd = bookmarkNode.getAttributes().getNamedItem("w:name");
if (nd!=null) {
final String bookmarkName = nd.getNodeValue();
if (hasChange.contains(bookmarkName)) {
bookmarkNode.getParentNode().removeChild(bookmarkNode);
}
}
}
}
docx.write(outputStream);
docx.close();
}
/** docx中定义的部分常量引用 **/
public static final String RUN_NODE_NAME = "w:r";
public static final String TEXT_NODE_NAME = "w:t";
public static final String BOOKMARK_START_TAG = "w:bookmarkStart";
public static final String BOOKMARK_END_TAG = "w:bookmarkEnd";
public static final String BOOKMARK_ID_ATTR_NAME = "w:id";
public static final String STYLE_NODE_NAME = "w:rPr";
public static final String PARAGRAPH_PROPERTIES_NAME = "w:pPr";
public static final String NODENAME_PARAGRAPH = "w:p";
/**
* docx获取书签列表
* @param node
* @param bookmarkNodeList
* @param isAll
*/
private static void getBookmarksFromNode(Node node, List<Node> bookmarkNodeList, boolean isAll) {
if (node.getNodeName().equals(BOOKMARK_START_TAG)) {
bookmarkNodeList.add(node);
} else if (node.getNodeName().equals(BOOKMARK_END_TAG)) {
if (isAll) {
bookmarkNodeList.add(node);
}
} else if (node.getNodeName().equals(PARAGRAPH_PROPERTIES_NAME)) {
// Nothing
} else {
NodeList childNodes = node.getChildNodes();
for (int i = 0, j = childNodes.getLength(); i < j; i++) {
Node childNode = childNodes.item(i);
getBookmarksFromNode(childNode, bookmarkNodeList, isAll);
}
}
}
/**
*/
private static void replaceDocxBookmarkFromString(XWPFParagraph bookmarkParentParagraph, Node startBookmarkNode, String content) {
Node nextNode = startBookmarkNode.getNextSibling();
boolean contentReplaced = false;
while (nextNode != null) {
// 节点是结束符
if (nextNode.getNodeName().equals(BOOKMARK_END_TAG)) {
break;
}
// 1.寻找startBookmark的下一个 w:r 节点,然后将该节点中的 w:t 节点的真正文字内容替换掉(这样可以保留原来的bookmark的文字样式)
if (!contentReplaced && nextNode.getNodeName().equals("w:r")) {
NodeList runChildNodes = nextNode.getChildNodes();
for (int i = 0, j = runChildNodes.getLength(); i < j; i++) {
if (runChildNodes.item(i).getNodeName().equals("w:t")) {// w:t 是真正的文本内容
runChildNodes.item(i).getFirstChild().setNodeValue(content);
}
}
contentReplaced = true;
} else {
// 2.然后继续向下删除 endBookmark 之前的所有节点
startBookmarkNode.getParentNode().removeChild(nextNode);
}
nextNode = nextNode.getNextSibling();
}
//1-end
if (!contentReplaced) {
XWPFRun run = bookmarkParentParagraph.createRun();
run.setText(content);
Node newChildNode = run.getCTR().getDomNode();
Node startBookmarkNextNode = startBookmarkNode.getNextSibling();
if (startBookmarkNextNode == null) {
startBookmarkNode.getParentNode().appendChild(newChildNode);
} else {
startBookmarkNode.getParentNode().insertBefore(newChildNode, startBookmarkNextNode);
}
}
}
/**
* 向上递归获得指定node节点的上一个p节点
*/
private static XWPFParagraph getFirstParentParagraphByNode(Node node, XWPFDocument docx) throws XmlException {
final XmlOptions options = new XmlOptions();
options.setUseSameLocale(((DomImpl.Dom) node).locale());
return new XWPFParagraph(CTP.Factory.parse(getFirstParentNodeByNode(node, docx), options), docx);
}
/**
* 向上递归获得指定node节点的上一个p节点
*/
private static Node getFirstParentNodeByNode(Node node, XWPFDocument docx) throws XmlException {
final Node parentNode = node.getParentNode();
if (NODENAME_PARAGRAPH.equals(parentNode.getNodeName())) {
return parentNode;
}
return getFirstParentNodeByNode(parentNode, docx);
}
}