<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
package com.test
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.OutputStreamWriter
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.transform.OutputKeys
import javax.xml.transform.Transformer
import javax.xml.transform.TransformerFactory
import javax.xml.transform.dom.DOMSource
import javax.xml.transform.stream.StreamResult
import org.apache.poi.hwpf.HWPFDocument
import org.apache.poi.hwpf.converter.WordToHtmlConverter
import org.apache.poi.xwpf.converter.core.BasicURIResolver
import org.apache.poi.xwpf.converter.core.FileImageExtractor
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions
import org.apache.poi.xwpf.usermodel.XWPFDocument
import org.junit.Test
import org.w3c.dom.Document
public class TestWord
{
// doc转换为html
void docToHtml()
throws Exception
{
String sourceFileName = "C:\\doc\\test.doc"
String targetFileName = "C:\\html\\test.html"
String imagePathStr = "C:\\html\\image\\"
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName))
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document)
// 保存图片,并返回图片的相对路径
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
try (FileOutputStream out = new FileOutputStream(imagePathStr + name))
{
out.write(content)
}
catch (Exception e)
{
e.printStackTrace()
}
return "image/" + name
})
wordToHtmlConverter.processDocument(wordDocument)
Document htmlDocument = wordToHtmlConverter.getDocument()
DOMSource domSource = new DOMSource(htmlDocument)
StreamResult streamResult = new StreamResult(new File(targetFileName))
TransformerFactory tf = TransformerFactory.newInstance()
Transformer serializer = tf.newTransformer()
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8")
serializer.setOutputProperty(OutputKeys.INDENT, "yes")
serializer.setOutputProperty(OutputKeys.METHOD, "html")
serializer.transform(domSource, streamResult)
}
// docx转换为html
@Test
public void docxToHtml()
throws Exception
{
String sourceFileName = "D:\\帮助文档集合\\我的项目代码\\方文正13871269717--简历.docx"
String targetFileName = "E:\\ac\\test.html"
String imagePathStr = "E:\\ac\\img\\"
OutputStreamWriter outputStreamWriter = null
try
{
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName))
XHTMLOptions options = XHTMLOptions.create()
// 存放图片的文件夹
options.setExtractor(new FileImageExtractor(new File(imagePathStr)))
// html中图片的路径
options.URIResolver(new BasicURIResolver("img"))
outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName),
"utf-8")
XHTMLConverter xhtmlConverter = (XHTMLConverter)XHTMLConverter.getInstance()
xhtmlConverter.convert(document, outputStreamWriter, options)
}
finally
{
if (outputStreamWriter != null)
{
outputStreamWriter.close()
}
}
}
}