提前引入依赖
<!--注意版本保持一致 poi poi-ooxml poi-scratchpad-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<!-- 操作doc ppt xls -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
<!-- 操作docx pptx xlsx -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<!--httpclient依赖-->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
<version>2.0.2</version>
</dependency>
package com.openness.config;
import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
public class WordToHtmlUtil {
public static String getWordToHtml(String filePath) throws IOException {
URL url = new URL(filePath);
File file = urlToFile(url);
String sub = filePath.substring(filePath.lastIndexOf("."));
//doc文档转html
if(sub.equals(".doc")){
try {
assert file != null;
FileInputStream inputStream = new FileInputStream(file);
HWPFDocument hwpfDocument = new HWPFDocument(inputStream);//构造函数放入文件流得到HWPFDocument对象
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());//构造WordToHtmlConverter对象
//开始解析doc文档---------------------------------------------
wordToHtmlConverter.processDocument(hwpfDocument);
Document document = wordToHtmlConverter.getDocument();
//通过TransformerFactory创造出Transformer ,并设置Transformer的属性
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "html");
//transformer.transform()需要参数1 Source 参数2 Result
DOMSource domSource = new DOMSource(document);
ByteArrayOutputStream outputtarget = new ByteArrayOutputStream();
StreamResult streamResult = new StreamResult(outputtarget);
//开始转换,结果数据在ByteArrayOutputStream里
transformer.transform(domSource, streamResult);//参数1 Source 参数2 Result
//转成字符串并返回
return outputtarget.toString("utf-8");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
//docx文档转html
else if(sub.equals(".docx")){
try {
assert file != null;
FileInputStream inputStream = new FileInputStream(file);
//创建操作docx word的对象
XWPFDocument xwpfDocument = new XWPFDocument(inputStream);
//解析XHTML配置
XHTMLOptions xhtmlOptions = XHTMLOptions.create();
//将样式都写为内联样式,而不是写到style标签中 默认false
xhtmlOptions.setFragment(true);
xhtmlOptions.setIgnoreStylesIfUnused(false);
xhtmlOptions.setImageManager(new Base64EmbedImgManager());//图片用base64转化
//将XWPFDocument转化成HTML
ByteArrayOutputStream outputtarget = new ByteArrayOutputStream();
XHTMLConverter.getInstance().convert(xwpfDocument,outputtarget,xhtmlOptions);
//转成字符串并返回
return outputtarget.toString("utf-8");
} catch (Exception e) {
throw new RuntimeException(e);
}
}else{
return "文件格式错误";
}
}
//url转file
public static File urlToFile(URL url) {
InputStream is = null;
File file = null;
FileOutputStream fos = null;
try {
file = File.createTempFile("tmp", null);
URLConnection urlConn = null;
urlConn = url.openConnection();
is = urlConn.getInputStream();
fos = new FileOutputStream(file);
byte[] buffer = new byte[4096];
int length;
while ((length = is.read(buffer)) > 0) {
fos.write(buffer, 0, length);
}
return file;
} catch (IOException e) {
return null;
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
}
}
if (fos != null) {
try {
fos.close();
} catch (IOException e) {
}
}
}
}
}