首先引入依赖:
版本:
<poi-scratchpad.version>3.14</poi-scratchpad.version>
<poi-ooxml.version>3.14</poi-ooxml.version>
<xdocreport.version>1.0.6</xdocreport.version>
<poi-ooxml-schemas.version>3.14</poi-ooxml-schemas.version>
<ooxml-schemas.version>1.3</ooxml-schemas.version>
<jsoup.version>1.11.3</jsoup.version>
<!-- word ===> HTML-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${poi-scratchpad.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi-ooxml.version}</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>${xdocreport.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>${poi-ooxml-schemas.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>${ooxml-schemas.version}</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
工具类
package com.tencent.tusi.common.utils;
import com.tencent.tusi.common.utils.minio.MinioUtil;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.xwpf.converter.core.IImageExtractor;
import org.apache.poi.xwpf.converter.core.IURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.HashMap;
import java.util.Map;
import static com.tencent.tusi.common.constant.PublicConstant.*;
public class Word2Html {
public static String Word2Html(byte[] inputStream) throws Exception {
InputStream word2007 = new ByteArrayInputStream(inputStream);
HWPFDocument hwpfDocument = null;
InputStream word2003 = new ByteArrayInputStream(inputStream);
try {
// 只有office 2003及以下版本能解析
hwpfDocument = new HWPFDocument(word2003);
} catch (OfficeXmlFileException e) {
// 如果是版本问题,则使用2007版本
return Word2007ToHtml(word2007);
}
return Word2003ToHtml(hwpfDocument);
}
private static String Word2003ToHtml(HWPFDocument wordDocument) throws Exception {
//HWPFDocument wordDocument = new HWPFDocument(inputStream);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
// // 合同中无图片,暂不设置图片管理
//解析word文档
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
String htmlName = Tools.getRandomHTMLFileName();
String htmlPath = htmlName;
File htmlFile = new File(htmlPath);
OutputStream outStream = new FileOutputStream(htmlFile);
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
// 将文件上传到文件服务器中
String result = MinioUtil.uploadFileByInputStream(htmlFile,Tools.file2InputStream(htmlFile),CONTRACT,TEMPLATE_DATA);
if (!StringUtils.isEmpty(result)){
htmlFile.delete();
}
return result;
}
public static String Word2007ToHtml(InputStream input)
throws IOException {
XWPFDocument document = new XWPFDocument(input);
// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
XHTMLOptions options = XHTMLOptions.create();
Map<String, String> imgMap = new HashMap<>();
options.setExtractor(new IImageExtractor() {
@Override
public void extract(String imagePath, byte[] imageData) throws IOException {
//获取图片数据并且上传
System.out.println(imagePath);
String fileName = imagePath.substring(imagePath.lastIndexOf("."));
String url = MinioUtil.uploadImgSimple(imageData,fileName,CONTRACT,TEMPLATE_DATA);
imgMap.put(imagePath, url);
}
});
// html中图片的路径 相对路径
options.URIResolver(new IURIResolver() {
@Override
public String resolve(String uri) {
//设置图片路径
return imgMap.get(uri);
}
});
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
// 3) 将 XWPFDocument转换成XHTML
ByteArrayOutputStream baos = new ByteArrayOutputStream();
XHTMLConverter.getInstance().convert(document, baos, options);
InputStream inputStream = new ByteArrayInputStream(baos.toByteArray());
int index;
byte[] bytes = new byte[1024];
String allFilePath = Tools.getRandomHTMLFileName();
// 会自动创建文件并写入
FileOutputStream downloadFile = new FileOutputStream(allFilePath);
String htmlHead = "<!DOCTYPE html>" ;
byte[] headByte = htmlHead.getBytes();
downloadFile.write(headByte);
while ((index = inputStream.read(bytes)) != -1) {
//downloadFile.write(bytes, 0, index);
downloadFile.write(bytes, headByte.length, index);
downloadFile.flush();
}
downloadFile.close();
inputStream.close();
File file = new File(allFilePath);
InputStream inputStream1 = Tools.file2InputStream(file);
String result = "";
try {
result = MinioUtil.uploadFileByInputStream(file,inputStream1,CONTRACT,TEMPLATE_DATA);
file.delete();
} catch (Exception e) {
e.printStackTrace();
}
// String content = baos.toString();
baos.close();
return result;
}
}