依赖:
<!-- 针对2007以上版本的库 -->
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.6</version>
</dependency>
<!-- 针对2003版本的库 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
<version>2.0.2</version>
</dependency>
工具类:
package com.hnzie.config;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
public class WordToHtmlUtil {
/**
* word2003转html
* @throws Exception
*/
public void word2003toHtm() throws Exception{
String filePath = "d:/test";
String fileName = "test.doc";
String htmlname = "test.html";
File file = new File(filePath + File.separator + fileName);
InputStream input = new FileInputStream(file);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
// 解析word文档
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
File htmlFile = new File(filePath +File.separator+ htmlname);
OutputStream outStream = new FileOutputStream(htmlFile);
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
}
/**
* word2007转html
* @throws Exception
*/
public void word2007toHtm() throws Exception{
String filePath = "d:/test";
String fileName = "test.doc";
String htmlname = "test.html";
File file = new File(filePath + File.separator + fileName);
InputStream in = new FileInputStream(file);
XWPFDocument document = new XWPFDocument(in);
// 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
File imageFolderFile = new File(filePath);
XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
// 将 XWPFDocument转换成XHTML
File htm = new File(filePath +File.separator+ htmlname);
OutputStream out = new FileOutputStream(htm);
XHTMLConverter.getInstance().convert(document, out, options);
}
}