java poi word转html_使用POI实现word转html

该文章介绍如何使用Java的Apache POI库,结合Hutool工具,将2003和2007版本的Word文档转换为HTML格式。通过WordToHtmlConverter和XHTMLConverter进行解析,并将图片转为Base64编码内嵌于HTML中。
摘要由CSDN通过智能技术生成

import cn.hutool.core.img.ImgUtil;

import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;

import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;

import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.openxml4j.util.ZipSecureFile;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import java.awt.image.BufferedImage;

import java.io.*;

/*** office转换工具测试**/

public class OfficeConvertUtil {

/*** 将word2003转换为html文件 2017-2-27** @param wordPath word文件路径* @param wordName word文件名称无后缀* @param suffix word文件后缀* @throws IOException* @throws TransformerException* @throws ParserConfigurationException*/

public static String Word2003ToHtml(String wordPath, String wordName,

String suffix) throws IOException, TransformerException,

ParserConfigurationException {

String htmlPath = wordPath + File.separator + "html"

+ File.separator;

String htmlName = wordName + ".html";

final String imagePath = htmlPath + "image" + File.separator;

// 判断html文件是否存在,每次重新生成 File htmlFile = new File(htmlPath + htmlName);

// if (htmlFile.exists()) {// return htmlFile.getAbsolutePath();// }

// 原word文档 final String file = wordPath + File.separator + wordName + suffix;

InputStream input = new FileInputStream(new File(file));

HWPFDocument wordDocument = new HWPFDocument(input);

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

DocumentBuilderFactory.newInstance().newDocumentBuilder()

.newDocument());

wordToHtmlConverter.setPicturesManager((content, pictureType, suggestedName, widthInches, heightInches) -> {

BufferedImage bufferedImage = ImgUtil.toImage(content);

String base64Img = ImgUtil.toBase64(bufferedImage, pictureType.getExtension());

// 带图片的word,则将图片转为base64编码,保存在一个页面中 StringBuilder sb = (new StringBuilder(base64Img.length() + "data:;base64,".length()).append("data:;base64,").append(base64Img));

return sb.toString();

});

// 解析word文档 wordToHtmlConverter.processDocument(wordDocument);

Document htmlDocument = wordToHtmlConverter.getDocument();

// 生成html文件上级文件夹 File folder = new File(htmlPath);

if (!folder.exists()) {

folder.mkdirs();

}

// 生成html文件地址 OutputStream outStream = new FileOutputStream(htmlFile);

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(outStream);

TransformerFactory factory = TransformerFactory.newInstance();

Transformer serializer = factory.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

outStream.close();

return htmlFile.getAbsolutePath();

}

/*** 2007版本word转换成html 2017-2-27** @param wordPath word文件路径* @param wordName word文件名称无后缀* @param suffix word文件后缀* @return* @throws IOException*/

public static String Word2007ToHtml(String wordPath, String wordName, String suffix)

throws IOException {

ZipSecureFile.setMinInflateRatio(-1.0d);

String htmlPath = wordPath + File.separator + "html"

+ File.separator;

String htmlName = wordName + ".html";

String imagePath = htmlPath + "image" + File.separator;

// 判断html文件是否存在 File htmlFile = new File(htmlPath + htmlName);

// if (htmlFile.exists()) {// return htmlFile.getAbsolutePath();// }

// word文件 File wordFile = new File(wordPath + File.separator + wordName + suffix);

// 1) 加载word文档生成 XWPFDocument对象 InputStream in = new FileInputStream(wordFile);

XWPFDocument document = new XWPFDocument(in);

// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录) File imgFolder = new File(imagePath);

// 带图片的word,则将图片转为base64编码,保存在一个页面中 XHTMLOptions options = XHTMLOptions.create().indent(4).setImageManager(new Base64EmbedImgManager());

// 3) 将 XWPFDocument转换成XHTML // 生成html文件上级文件夹 File folder = new File(htmlPath);

if (!folder.exists()) {

folder.mkdirs();

}

OutputStream out = new FileOutputStream(htmlFile);

XHTMLConverter.getInstance().convert(document, out, options);

return htmlFile.getAbsolutePath();

}

public static void main(String[] args) throws Exception {

System.out.println(Word2003ToHtml("D:\\temp\\word", "21", ".doc"));

System.out.println(Word2007ToHtml("D:\\temp\\word", "3", ".docx"));

}

}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值