Java将doc,docx文档中指定的标签替换为文本

Java将doc,docx文档中指定的标签替换为文本

1 需求描述

用户提供word文件,需要开发功能,在word限定的位置,后台更新用户一些操作后输入的值
实现思路:通过在word待插入位置预置书签占位(这种方式不影响用户直观视觉体验),然后用poi去读取并替换这些书签,改为真实值
本案例用的文档
在这里插入图片描述

2 最终效果展示

在这里插入图片描述

3 代码实现

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.impl.store.DomImpl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
 * 使用 POI 读取 doc / docx 中的书签、替换书签内容
 * <!-- word加标签  start-->
	<dependency>
	  <groupId>commons-io</groupId>
	  <artifactId>commons-io</artifactId>
	  <version>2.9.0</version>
	</dependency>
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml</artifactId>
        <version>5.2.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-scratchpad</artifactId>
        <version>5.2.2</version>
    </dependency>
    <dependency>
        <groupId>com.deepoove</groupId>
        <artifactId>poi-tl</artifactId>
        <version>1.12.0</version>
    </dependency>
	<!-- word加标签 end-->
 */
public class TestWordBookmarkToString{
	/**
	 * 测试函数.
	 * @param args
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception {
		long start = System.currentTimeMillis();
		final LocalTime time = LocalTime.now();
		// 替换 docx 文件中的 bookmark 内容
		final String attr = ".docx";
		final String inPath = "F:\\BiaoQian" + attr;
		final String outPath = "F:\\BiaoQianNew" + time.format(DateTimeFormatter.ofPattern("HH时mm分ss秒SSS")) + attr;
		final InputStream inputStream = Files.newInputStream(Paths.get(inPath));
		final OutputStream outputStream = Files.newOutputStream(Paths.get(outPath));
		final boolean delBookMark = true;//替换完成是否删除 替换成功的标签
		if (inPath.endsWith("docx")) {//docx处理
			final Map<String, String> map = new HashMap<>();
			map.put("demo1", "张三");
			map.put("demo2", "男");
			docxOperate(inputStream, outputStream, map, delBookMark);
		} else {//doc处理
			final Map<String, String> map = new HashMap<>();
			map.put("demo1", "张三");
			map.put("demo2", "男");
			docOperate(inputStream, outputStream, map, delBookMark);
		}
		System.out.println("执行完成,耗时" + (System.currentTimeMillis() - start)/1000 + "秒");
	}
	/**
	 * doc标签替换
	 */
	public static void docOperate(InputStream inputStream,OutputStream outputStream,
			Map<String,String> dataMap,boolean delBookMark) throws IOException {
		//是否需要 移除 已经替换的标签
		final HWPFDocument document = new HWPFDocument(inputStream);
		final Bookmarks bookmarks = document.getBookmarks();
		final int no = bookmarks.getBookmarksCount();
		final Set<String> hasChange = new HashSet<String>();
		for(int dwI = 0;dwI < no;dwI++){
			final Bookmark bookmark = bookmarks.getBookmark(dwI);
			final String mark = bookmark.getName();
			if(dataMap.containsKey(mark)){
				hasChange.add(mark);
				final int s = bookmark.getStart();
				final int e = bookmark.getEnd();
				final String repValue = dataMap.get(bookmark.getName());
				System.out.println("执行替换:"+mark+"--->"+repValue);
				final Range range = new Range(s, e ,document);
				range.insertAfter(repValue);
				//range.replaceText(repValue, false);//这个不好用
			}
		}
		if (delBookMark) {
			System.out.println("执行已经替换的书签的移除");
			for (String hasmark : hasChange) {
				for(int dwI = 0;dwI < bookmarks.getBookmarksCount();dwI++){
					final String mark = bookmarks.getBookmark(dwI).getName();
					if (mark.equals(hasmark)) {
						bookmarks.remove(dwI);//找到了就移除
						break;
					}
				}
			}
		}
		document.write(outputStream);
		document.close();
		outputStream.close();
		inputStream.close();
	}
	/**
	 * docx标签替换
	 */
	public static void docxOperate(InputStream inputStream,OutputStream outputStream,Map<String,String> dataMap,boolean delBookMark) throws Exception {
		final Set<String> hasChange = new HashSet<String>();
		@SuppressWarnings("resource")
		final XWPFDocument docx = new XWPFDocument(inputStream).getXWPFDocument();
		final Node bodyNode = docx.getDocument().getBody().getDomNode();
		// 递归读取 bookmarkStart 节点,返回bookmarkNode集合(<w:bookmarkStart w:id="1" w:name="书签名称"/>)
		final List<Node> bookmarkNodeList = new ArrayList<>();
		getBookmarksFromNode(bodyNode, bookmarkNodeList, false);
		// 替换书签内容
		for (Node startBookmarkNode : bookmarkNodeList) {
			final String bookmarkName = startBookmarkNode.getAttributes().getNamedItem("w:name").getNodeValue();
			if (dataMap.containsKey(bookmarkName)) {
				final String data = dataMap.get(bookmarkName);
				if (data instanceof String) {// 内容是文本
					System.out.println("执行替换:"+bookmarkName+"--->"+data);
					hasChange.add(bookmarkName);
					replaceDocxBookmarkFromString(getFirstParentParagraphByNode(startBookmarkNode, docx), startBookmarkNode, data);
				}
			}
		}
		if (delBookMark) {
			System.out.println("执行已经替换的书签的移除");
			final List<Node> bookmarkAllNodeList = new ArrayList<>();
			getBookmarksFromNode(bodyNode, bookmarkAllNodeList, false);
			for (Node bookmarkNode : bookmarkAllNodeList) {
				System.out.println(bookmarkNode.getNodeName());
				final Node nd = bookmarkNode.getAttributes().getNamedItem("w:name");
				if (nd!=null) {
					final String bookmarkName = nd.getNodeValue();
					if (hasChange.contains(bookmarkName)) {
						bookmarkNode.getParentNode().removeChild(bookmarkNode);
					}
				}
			}
		}
		docx.write(outputStream);
		docx.close();
	}

	/** docx中定义的部分常量引用  **/
	public static final String RUN_NODE_NAME = "w:r";
	public static final String TEXT_NODE_NAME = "w:t";
	public static final String BOOKMARK_START_TAG = "w:bookmarkStart";
	public static final String BOOKMARK_END_TAG = "w:bookmarkEnd";
	public static final String BOOKMARK_ID_ATTR_NAME = "w:id";
	public static final String STYLE_NODE_NAME = "w:rPr";
	public static final String PARAGRAPH_PROPERTIES_NAME = "w:pPr";
	public static final String NODENAME_PARAGRAPH = "w:p";
	/**
	 * docx获取书签列表
	 * @param node
	 * @param bookmarkNodeList
	 * @param isAll
	 */
	private static void getBookmarksFromNode(Node node, List<Node> bookmarkNodeList, boolean isAll) {
		if (node.getNodeName().equals(BOOKMARK_START_TAG)) {
			bookmarkNodeList.add(node);
		} else if (node.getNodeName().equals(BOOKMARK_END_TAG)) {
			if (isAll) {
				bookmarkNodeList.add(node);
			}
		} else if (node.getNodeName().equals(PARAGRAPH_PROPERTIES_NAME)) {
			// Nothing
		} else {
			NodeList childNodes = node.getChildNodes();
			for (int i = 0, j = childNodes.getLength(); i < j; i++) {
				Node childNode = childNodes.item(i);
				getBookmarksFromNode(childNode, bookmarkNodeList, isAll);
			}
		}
	}
	/**
	 */
	private static void replaceDocxBookmarkFromString(XWPFParagraph bookmarkParentParagraph, Node startBookmarkNode, String content) {
		Node nextNode = startBookmarkNode.getNextSibling();
		boolean contentReplaced = false;
		while (nextNode != null) {
			// 节点是结束符
			if (nextNode.getNodeName().equals(BOOKMARK_END_TAG)) {
				break;
			}
			// 1.寻找startBookmark的下一个 w:r 节点,然后将该节点中的 w:t 节点的真正文字内容替换掉(这样可以保留原来的bookmark的文字样式)
			if (!contentReplaced && nextNode.getNodeName().equals("w:r")) {
				NodeList runChildNodes = nextNode.getChildNodes();
				for (int i = 0, j = runChildNodes.getLength(); i < j; i++) {
					if (runChildNodes.item(i).getNodeName().equals("w:t")) {// w:t 是真正的文本内容
						runChildNodes.item(i).getFirstChild().setNodeValue(content);
					}
				}
				contentReplaced = true;
			} else {
				// 2.然后继续向下删除 endBookmark 之前的所有节点
				startBookmarkNode.getParentNode().removeChild(nextNode);
			}
			nextNode = nextNode.getNextSibling();
		}
		//1-end
		if (!contentReplaced) {
			XWPFRun run = bookmarkParentParagraph.createRun();
			run.setText(content);
			Node newChildNode = run.getCTR().getDomNode();
			Node startBookmarkNextNode = startBookmarkNode.getNextSibling();
			if (startBookmarkNextNode == null) {
				startBookmarkNode.getParentNode().appendChild(newChildNode);
			} else {
				startBookmarkNode.getParentNode().insertBefore(newChildNode, startBookmarkNextNode);
			}
		}
	}
	/**
	 * 向上递归获得指定node节点的上一个p节点
	 */
	private static XWPFParagraph getFirstParentParagraphByNode(Node node, XWPFDocument docx) throws XmlException {
		final XmlOptions options = new XmlOptions();
		options.setUseSameLocale(((DomImpl.Dom) node).locale());
		return new XWPFParagraph(CTP.Factory.parse(getFirstParentNodeByNode(node, docx), options), docx);
	}
	/**
	 * 向上递归获得指定node节点的上一个p节点
	 */
	private static Node getFirstParentNodeByNode(Node node, XWPFDocument docx) throws XmlException {
		final Node parentNode = node.getParentNode();
		if (NODENAME_PARAGRAPH.equals(parentNode.getNodeName())) {
			return parentNode;
		}
		return getFirstParentNodeByNode(parentNode, docx);
	}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值