引入依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
<exclusions>
<exclusion>
<groupId>org.apache.poi</groupId>
<artifactId>poi-xml</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- 针对2007以上版本的库 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<!-- 针对2003版本的库 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
<version>2.0.2</version>
</dependency>
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.12.0</version>
</dependency>
OfficeConvertUtil 工具类
import cn.hutool.core.img.ImgUtil;
import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
public class OfficeConvertUtil {
/**
* 将word2003转换为html文件
* @throws IOException
* @throws TransformerException
* @throws ParserConfigurationException
*/
public static String word2003ToHtml(HWPFDocument wordDocument) throws TransformerException,
ParserConfigurationException {
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.setPicturesManager((content, pictureType, suggestedName, widthInches, heightInches) -> {
BufferedImage bufferedImage = ImgUtil.toImage(content);
String base64Img = ImgUtil.toBase64(bufferedImage, pictureType.getExtension());
// 带图片的word,则将图片转为base64编码,保存在一个页面中
StringBuilder sb = (new StringBuilder(base64Img.length() + "data:;base64,".length()).append("data:;base64,").append(base64Img));
return sb.toString();
});
// 解析word文档
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outputStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
return outputStream.toString();
}
/**
* 2007版本word转换成html
*
* @param document word文件路径
* @return
* @throws IOException
*/
public static String word2007ToHtml(XWPFDocument document)
throws IOException {
// 带图片的word,则将图片转为base64编码,保存在一个页面中
XHTMLOptions options = XHTMLOptions.create().indent(4).setImageManager(new Base64EmbedImgManager());
// ) 将 XWPFDocument转换成XHTML
// 生成html文件上级文件夹
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
XHTMLConverter.getInstance().convert(document, outputStream, options);
return outputStream.toString();
}
}
输出内容
String content ;
FileInputStream fis = new FileInputStream(file);
if ( param.getPath().endsWith(".docx") ){
XWPFDocument document = new XWPFDocument(fis);
content = OfficeConvertUtil.word2007ToHtml(document);
document.close();
}else{
HWPFDocument wordDocument = new HWPFDocument(fis);
content = OfficeConvertUtil.word2003ToHtml(wordDocument);
wordDocument.close();
}
// 关闭资源
fis.close();
content = content.replaceAll("\n","<br/>");