java poi word转html_使用POI实现word转html

最新推荐文章于 2024-08-19 16:49:49 发布

Books.Fan

最新推荐文章于 2024-08-19 16:49:49 发布

阅读量442

点赞数

文章标签： java poi word转html

本文链接：https://blog.csdn.net/weixin_29526313/article/details/114068354

版权

该文章介绍如何使用Java的Apache POI库，结合Hutool工具，将2003和2007版本的Word文档转换为HTML格式。通过WordToHtmlConverter和XHTMLConverter进行解析，并将图片转为Base64编码内嵌于HTML中。

摘要由CSDN通过智能技术生成

import cn.hutool.core.img.ImgUtil;

import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;

import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;

import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.openxml4j.util.ZipSecureFile;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import java.awt.image.BufferedImage;

import java.io.*;

/*** office转换工具测试**/

public class OfficeConvertUtil {

/*** 将word2003转换为html文件 2017-2-27** @param wordPath word文件路径* @param wordName word文件名称无后缀* @param suffix word文件后缀* @throws IOException* @throws TransformerException* @throws ParserConfigurationException*/

public static String Word2003ToHtml(String wordPath, String wordName,

String suffix) throws IOException, TransformerException,

ParserConfigurationException {

String htmlPath = wordPath + File.separator + "html"

+ File.separator;

String htmlName = wordName + ".html";

final String imagePath = htmlPath + "image" + File.separator;

// 判断html文件是否存在，每次重新生成 File htmlFile = new File(htmlPath + htmlName);

// if (htmlFile.exists()) {// return htmlFile.getAbsolutePath();// }

// 原word文档 final String file = wordPath + File.separator + wordName + suffix;

InputStream input = new FileInputStream(new File(file));

HWPFDocument wordDocument = new HWPFDocument(input);

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

DocumentBuilderFactory.newInstance().newDocumentBuilder()

.newDocument());

wordToHtmlConverter.setPicturesManager((content, pictureType, suggestedName, widthInches, heightInches) -> {

BufferedImage bufferedImage = ImgUtil.toImage(content);

String base64Img = ImgUtil.toBase64(bufferedImage, pictureType.getExtension());

// 带图片的word，则将图片转为base64编码，保存在一个页面中 StringBuilder sb = (new StringBuilder(base64Img.length() + "data:;base64,".length()).append("data:;base64,").append(base64Img));

return sb.toString();

});

// 解析word文档 wordToHtmlConverter.processDocument(wordDocument);

Document htmlDocument = wordToHtmlConverter.getDocument();

// 生成html文件上级文件夹 File folder = new File(htmlPath);

if (!folder.exists()) {

folder.mkdirs();

}

// 生成html文件地址 OutputStream outStream = new FileOutputStream(htmlFile);

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(outStream);

TransformerFactory factory = TransformerFactory.newInstance();

Transformer serializer = factory.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

outStream.close();

return htmlFile.getAbsolutePath();

}

/*** 2007版本word转换成html 2017-2-27** @param wordPath word文件路径* @param wordName word文件名称无后缀* @param suffix word文件后缀* @return* @throws IOException*/

public static String Word2007ToHtml(String wordPath, String wordName, String suffix)

throws IOException {

ZipSecureFile.setMinInflateRatio(-1.0d);

String htmlPath = wordPath + File.separator + "html"

+ File.separator;

String htmlName = wordName + ".html";

String imagePath = htmlPath + "image" + File.separator;

// 判断html文件是否存在 File htmlFile = new File(htmlPath + htmlName);

// if (htmlFile.exists()) {// return htmlFile.getAbsolutePath();// }

// word文件 File wordFile = new File(wordPath + File.separator + wordName + suffix);

// 1) 加载word文档生成 XWPFDocument对象 InputStream in = new FileInputStream(wordFile);

XWPFDocument document = new XWPFDocument(in);

// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录) File imgFolder = new File(imagePath);

// 带图片的word，则将图片转为base64编码，保存在一个页面中 XHTMLOptions options = XHTMLOptions.create().indent(4).setImageManager(new Base64EmbedImgManager());

// 3) 将 XWPFDocument转换成XHTML // 生成html文件上级文件夹 File folder = new File(htmlPath);

if (!folder.exists()) {

folder.mkdirs();

}

OutputStream out = new FileOutputStream(htmlFile);

XHTMLConverter.getInstance().convert(document, out, options);

return htmlFile.getAbsolutePath();

}

public static void main(String[] args) throws Exception {

System.out.println(Word2003ToHtml("D:\\temp\\word", "21", ".doc"));

System.out.println(Word2007ToHtml("D:\\temp\\word", "3", ".docx"));

}

Books.Fan

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫