一、POM文件
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>5.2.1</version>
</dependency>
二、转换具体代码
package org.zhao.component;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
/**
* word文档转换HTML 含文档里面的图片
*
* @author Administrator
* @date 2022年03月18日
*/
public class WordComponent {
private static String getFileExtname(String filename) {
return filename.substring(filename.lastIndexOf(".")).toLowerCase();
}
private static void createFileDir(String dirPath) {
File file = new File(dirPath);
if (!file.exists() && !file.isDirectory()) {
boolean r = file.mkdirs();
System.out.println(dirPath + "不存在,创建文件夹->" + r);
}
}
public static void docToHtml(String docFilePath) throws Exception {
File file = new File(docFilePath);
if (!file.exists()) {
System.err.println(docFilePath + "->文件不存在");
return;
}
String name = file.getName();
String dirName = name.replace(getFileExtname(name), "");
if (dirName.length() > 100) {
dirName = String.valueOf(System.currentTimeMillis());
}
//html文件目录
String htmlDirPath = file.getParent() + dirName;
//创建目录
createFileDir(htmlDirPath);
//存储图片目录
String imagePath = htmlDirPath + "/image/";
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(docFilePath));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager((content, pictureType, suggestedName, widthInches, heightInches) -> imagePath + suggestedName);
wordToHtmlConverter.processDocument(wordDocument);
List<Picture> allPictures = wordDocument.getPicturesTable().getAllPictures();
if (CollectionUtils.isNotEmpty(allPictures)) {
createFileDir(imagePath);
allPictures.forEach(picture -> {
try {
picture.writeImageContent(new FileOutputStream(imagePath + picture.suggestFullFileName()));
} catch (IOException e) {
e.printStackTrace();
}
});
}
Document htmlDocument = wordToHtmlConverter.getDocument();
String htmlPath = htmlDirPath + "/" + dirName + ".html";
File out = new File(htmlPath);
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
System.out.println("转换成功");
}
public static void main(String[] args) throws Exception {
docToHtml("D:/我是一篇简历.doc");
}
}
三、Word内容
四、转换后内容
代码块
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=utf-8">
<style type="text/css">.b1{white-space-collapsing:preserve;}
.b2{margin: 1.0in 0.7875in 0.7875in 1.0in;}
.s1{font-weight:bold;color:black;}
.s2{color:black;}
.s3{font-weight:bold;}
.p1{text-align:center;hyphenate:auto;font-family:华文新魏;font-size:36pt;}
.p2{text-align:center;hyphenate:auto;font-family:宋体;font-size:12pt;}
.p3{text-align:center;hyphenate:auto;font-family:楷体_GB2312;font-size:12pt;}
.p4{text-align:center;hyphenate:auto;font-family:Times New Roman;font-size:12pt;}
.p5{text-indent:0.16666667in;text-align:justify;hyphenate:auto;font-family:楷体_GB2312;font-size:12pt;}
.p6{text-align:justify;hyphenate:auto;font-family:楷体_GB2312;font-size:14pt;}
.p7{text-align:justify;hyphenate:auto;font-family:宋体;font-size:12pt;}
.p8{text-align:justify;hyphenate:auto;font-family:Times New Roman;font-size:12pt;}
.p9{text-indent:0.16666667in;text-align:justify;hyphenate:auto;font-family:Times New Roman;font-size:12pt;}
.p10{text-indent:0.3125in;text-align:justify;hyphenate:auto;font-family:Times New Roman;font-size:12pt;}
.p11{text-align:justify;hyphenate:auto;font-family:宋体;font-size:11pt;}
.p12{text-align:justify;hyphenate:auto;font-family:Times New Roman;font-size:10pt;}
.td1{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td2{width:1.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td3{width:0.875in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td4{width:1.25in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td5{width:0.625in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td6{width:1.125in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:1.0pt solid black;border-top:1.500pt solid black;}
.td7{width:1.125in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:1.0pt solid black;border-right:1.500pt solid black;border-top:1.500pt solid black;}
.td8{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:thin solid black;}
.td9{width:1.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:thin solid black;}
.td10{width:0.875in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;}
.td11{width:1.25in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:thin solid black;}
.td12{width:0.625in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;}
.td13{width:1.125in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:1.0pt solid black;border-top:thin solid black;}
.td14{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:thin solid black;}
.td15{width:1.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:thin solid black;border-right:thin solid black;border-top:thin solid black;}
.td16{width:0.875in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:thin solid black;border-right:thin solid black;border-top:thin solid black;}
.td17{width:3.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:thin solid black;border-right:1.0pt solid black;border-top:thin solid black;}
.td18{width:5.625in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:1.500pt solid black;border-right:1.0pt solid black;border-top:1.0pt solid black;}
.td19{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:1.0pt solid black;}
.td20{width:6.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:thin solid black;border-right:1.500pt solid black;border-top:1.0pt solid black;}
.td21{width:6.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:1.500pt solid black;border-right:1.500pt solid black;border-top:1.0pt solid black;}
.td22{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.500pt solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:1.0pt solid black;}
.td23{width:6.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.500pt solid black;border-left:thin solid black;border-right:1.500pt solid black;border-top:1.0pt solid black;}
.r1{height:0.39375in;}
.r2{height:0.11180556in;}
.r3{height:0.90555555in;}
.r4{height:0.110416666in;keep-together:always;}
.r5{height:0.90555555in;keep-together:always;}
.r6{height:0.07847222in;keep-together:always;}
.r7{height:0.1388889in;}
.r8{height:0.16666667in;keep-together:always;}
.r9{height:1.18125in;keep-together:always;}
.r10{height:1.1416667in;}
.t1{table-layout:fixed;border-collapse:collapse;border-spacing:0;}
</style>
<title>个人简历表格</title>
<meta content="User" name="author">
</head>
<body class="b1 b2">
<p class="p1">
<span class="s1">个人简历表格</span>
</p>
<table class="t1">
<tbody>
<tr class="r1">
<td class="td1">
<p class="p2">
<span class="s1">姓 名</span>
</p>
</td><td class="td2">
<p class="p3"></p>
</td><td class="td3">
<p class="p2">
<span class="s1">出生年月</span>
</p>
</td><td class="td4">
<p class="p3"></p>
</td><td class="td5">
<p class="p4">
<span class="s1">民族</span>
</p>
</td><td class="td6">
<p class="p3"></p>
</td><td class="td7" rowspan="2">
<p class="p4">
<img src="D:\%E6%88%91%E6%98%AF%E4%B8%80%E7%AF%87%E7%AE%80%E5%8E%86/image/0.png" style="width:0.9677889in;height:0.9677889in;vertical-align:text-bottom;">
</p>
</td>
</tr>
<tr class="r1">
<td class="td8">
<p class="p2">
<span class="s1">籍 贯</span>
</p>
</td><td class="td9">
<p class="p3"></p>
</td><td class="td10">
<p class="p2">
<span class="s1">毕业时间</span>
</p>
</td><td class="td11">
<p class="p3"></p>
</td><td class="td12">
<p class="p4">
<span class="s1">学历</span>
</p>
</td><td class="td13">
<p class="p5">
<span class="s2"> </span>
</p>
</td>
</tr>
<tr class="r1">
<td class="td14">
<p class="p2">
<span class="s1">性 别</span>
</p>
</td><td class="td15">
<p class="p3"></p>
</td><td class="td16">
<p class="p2">
<span class="s1">专 业</span>
</p>
</td><td class="td17" colspan="3">
<p class="p3"></p>
</td>
</tr>
<tr class="r2">
<td class="td18" colspan="6">
<p class="p4"></p>
</td>
</tr>
<tr class="r3">
<td class="td19">
<p class="p4">
<span class="s1">求</span>
</p>
<p class="p4">
<span class="s1">职</span>
</p>
<p class="p4">
<span class="s1">意</span>
</p>
<p class="p4">
<span class="s1">向</span>
</p>
</td><td class="td20" colspan="6">
<p class="p6"></p>
</td>
</tr>
<tr class="r4">
<td class="td21" colspan="7">
<p class="p7"></p>
</td>
</tr>
<tr class="r5">
<td class="td19">
<p class="p2">
<span class="s3">兴</span>
</p>
<p class="p2">
<span class="s3">趣</span>
</p>
<p class="p2">
<span class="s3">爱</span>
</p>
<p class="p2">
<span class="s3">好</span>
</p>
</td><td class="td20" colspan="6">
<p class="p8"></p>
</td>
</tr>
<tr class="r6">
<td class="td21" colspan="7">
<p class="p9"></p>
</td>
</tr>
<tr class="r5">
<td class="td19">
<p class="p4">
<span class="s3">社会</span>
</p>
<p class="p4">
<span class="s3">实践</span>
</p>
<p class="p4">
<span class="s3">经验</span>
</p>
</td><td class="td20" colspan="6">
<p class="p7"></p>
</td>
</tr>
<tr class="r7">
<td class="td21" colspan="7">
<p class="p10"></p>
</td>
</tr>
<tr class="r3">
<td class="td19">
<p class="p2">
<span class="s3">在校期间担任过何种职务</span>
</p>
</td><td class="td20" colspan="6">
<p class="p6"></p>
</td>
</tr>
<tr class="r8">
<td class="td21" colspan="7">
<p class="p9"></p>
</td>
</tr>
<tr class="r9">
<td class="td19">
<p class="p4">
<span class="s1">计</span>
</p>
<p class="p4">
<span class="s1">算</span>
</p>
<p class="p4">
<span class="s1">机</span>
</p>
<p class="p4">
<span class="s1">水</span>
</p>
<p class="p4">
<span class="s1">平</span>
</p>
</td><td class="td20" colspan="6">
<p class="p6"></p>
</td>
</tr>
<tr class="r7">
<td class="td21" colspan="7">
<p class="p7"></p>
</td>
</tr>
<tr class="r10">
<td class="td22">
<p class="p4">
<span class="s1">自</span>
</p>
<p class="p4">
<span class="s1">我</span>
</p>
<p class="p4">
<span class="s1">评</span>
</p>
<p class="p4">
<span class="s1">价</span>
</p>
</td><td class="td23" colspan="6">
<p class="p11"></p>
</td>
</tr>
</tbody>
</table>
<p class="p12"></p>
</body>
</html>