JAVA利用POI scratchpad 5.2.1 将Word文档doc格式转换成HTML 格式 含文档里面图片

一、POM文件

 <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>5.2.1</version>
 </dependency>

二、转换具体代码

package org.zhao.component;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;

/**
 * word文档转换HTML 含文档里面的图片
 *
 * @author Administrator
 * @date 2022年03月18日
 */
public class WordComponent {

    private static String getFileExtname(String filename) {
        return filename.substring(filename.lastIndexOf(".")).toLowerCase();
    }

    private static void createFileDir(String dirPath) {
        File file = new File(dirPath);
        if (!file.exists() && !file.isDirectory()) {
            boolean r = file.mkdirs();
            System.out.println(dirPath + "不存在,创建文件夹->" + r);
        }
    }


    public static void docToHtml(String docFilePath) throws Exception {
        File file = new File(docFilePath);
        if (!file.exists()) {
            System.err.println(docFilePath + "->文件不存在");
            return;
        }
        String name = file.getName();
        String dirName = name.replace(getFileExtname(name), "");
        if (dirName.length() > 100) {
            dirName = String.valueOf(System.currentTimeMillis());
        }
        //html文件目录
        String htmlDirPath = file.getParent() + dirName;
        //创建目录
        createFileDir(htmlDirPath);
        //存储图片目录
        String imagePath = htmlDirPath + "/image/";
        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(docFilePath));
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.setPicturesManager((content, pictureType, suggestedName, widthInches, heightInches) -> imagePath + suggestedName);
        wordToHtmlConverter.processDocument(wordDocument);
        List<Picture> allPictures = wordDocument.getPicturesTable().getAllPictures();
        if (CollectionUtils.isNotEmpty(allPictures)) {
            createFileDir(imagePath);
            allPictures.forEach(picture -> {
                try {
                    picture.writeImageContent(new FileOutputStream(imagePath + picture.suggestFullFileName()));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
        }
        Document htmlDocument = wordToHtmlConverter.getDocument();
        String htmlPath = htmlDirPath + "/" + dirName + ".html";
        File out = new File(htmlPath);
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        System.out.println("转换成功");
    }


    public static void main(String[] args) throws Exception {
        docToHtml("D:/我是一篇简历.doc");
    }
}

 三、Word内容

 

四、转换后内容

 

        代码块

<html>
    <head>
        <META http-equiv="Content-Type" content="text/html; charset=utf-8">
        <style type="text/css">.b1{white-space-collapsing:preserve;}
.b2{margin: 1.0in 0.7875in 0.7875in 1.0in;}
.s1{font-weight:bold;color:black;}
.s2{color:black;}
.s3{font-weight:bold;}
.p1{text-align:center;hyphenate:auto;font-family:华文新魏;font-size:36pt;}
.p2{text-align:center;hyphenate:auto;font-family:宋体;font-size:12pt;}
.p3{text-align:center;hyphenate:auto;font-family:楷体_GB2312;font-size:12pt;}
.p4{text-align:center;hyphenate:auto;font-family:Times New Roman;font-size:12pt;}
.p5{text-indent:0.16666667in;text-align:justify;hyphenate:auto;font-family:楷体_GB2312;font-size:12pt;}
.p6{text-align:justify;hyphenate:auto;font-family:楷体_GB2312;font-size:14pt;}
.p7{text-align:justify;hyphenate:auto;font-family:宋体;font-size:12pt;}
.p8{text-align:justify;hyphenate:auto;font-family:Times New Roman;font-size:12pt;}
.p9{text-indent:0.16666667in;text-align:justify;hyphenate:auto;font-family:Times New Roman;font-size:12pt;}
.p10{text-indent:0.3125in;text-align:justify;hyphenate:auto;font-family:Times New Roman;font-size:12pt;}
.p11{text-align:justify;hyphenate:auto;font-family:宋体;font-size:11pt;}
.p12{text-align:justify;hyphenate:auto;font-family:Times New Roman;font-size:10pt;}
.td1{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td2{width:1.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td3{width:0.875in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td4{width:1.25in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td5{width:0.625in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:1.500pt solid black;}
.td6{width:1.125in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:1.0pt solid black;border-top:1.500pt solid black;}
.td7{width:1.125in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:1.0pt solid black;border-right:1.500pt solid black;border-top:1.500pt solid black;}
.td8{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:thin solid black;}
.td9{width:1.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:thin solid black;}
.td10{width:0.875in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;}
.td11{width:1.25in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;border-top:thin solid black;}
.td12{width:0.625in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:thin solid black;}
.td13{width:1.125in;padding-start:0.0in;padding-end:0.0in;border-bottom:thin solid black;border-left:thin solid black;border-right:1.0pt solid black;border-top:thin solid black;}
.td14{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:thin solid black;}
.td15{width:1.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:thin solid black;border-right:thin solid black;border-top:thin solid black;}
.td16{width:0.875in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:thin solid black;border-right:thin solid black;border-top:thin solid black;}
.td17{width:3.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:thin solid black;border-right:1.0pt solid black;border-top:thin solid black;}
.td18{width:5.625in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:1.500pt solid black;border-right:1.0pt solid black;border-top:1.0pt solid black;}
.td19{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:1.0pt solid black;}
.td20{width:6.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:thin solid black;border-right:1.500pt solid black;border-top:1.0pt solid black;}
.td21{width:6.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.0pt solid black;border-left:1.500pt solid black;border-right:1.500pt solid black;border-top:1.0pt solid black;}
.td22{width:0.75in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.500pt solid black;border-left:1.500pt solid black;border-right:thin solid black;border-top:1.0pt solid black;}
.td23{width:6.0in;padding-start:0.0in;padding-end:0.0in;border-bottom:1.500pt solid black;border-left:thin solid black;border-right:1.500pt solid black;border-top:1.0pt solid black;}
.r1{height:0.39375in;}
.r2{height:0.11180556in;}
.r3{height:0.90555555in;}
.r4{height:0.110416666in;keep-together:always;}
.r5{height:0.90555555in;keep-together:always;}
.r6{height:0.07847222in;keep-together:always;}
.r7{height:0.1388889in;}
.r8{height:0.16666667in;keep-together:always;}
.r9{height:1.18125in;keep-together:always;}
.r10{height:1.1416667in;}
.t1{table-layout:fixed;border-collapse:collapse;border-spacing:0;}
</style>
        <title>个人简历表格</title>
        <meta content="User" name="author">
    </head>
    <body class="b1 b2">
        <p class="p1">
            <span class="s1">个人简历表格</span>
        </p>
        <table class="t1">
            <tbody>
                <tr class="r1">
                    <td class="td1">
                        <p class="p2">
                            <span class="s1">姓 名</span>
                        </p>
                    </td><td class="td2">
                        <p class="p3"></p>
                    </td><td class="td3">
                        <p class="p2">
                            <span class="s1">出生年月</span>
                        </p>
                    </td><td class="td4">
                        <p class="p3"></p>
                    </td><td class="td5">
                        <p class="p4">
                            <span class="s1">民族</span>
                        </p>
                    </td><td class="td6">
                        <p class="p3"></p>
                    </td><td class="td7" rowspan="2">
                        <p class="p4">
                            <img src="D:\%E6%88%91%E6%98%AF%E4%B8%80%E7%AF%87%E7%AE%80%E5%8E%86/image/0.png" style="width:0.9677889in;height:0.9677889in;vertical-align:text-bottom;">
                        </p>
                    </td>
                </tr>
                <tr class="r1">
                    <td class="td8">
                        <p class="p2">
                            <span class="s1">籍 贯</span>
                        </p>
                    </td><td class="td9">
                        <p class="p3"></p>
                    </td><td class="td10">
                        <p class="p2">
                            <span class="s1">毕业时间</span>
                        </p>
                    </td><td class="td11">
                        <p class="p3"></p>
                    </td><td class="td12">
                        <p class="p4">
                            <span class="s1">学历</span>
                        </p>
                    </td><td class="td13">
                        <p class="p5">
                            <span class="s2">    </span>
                        </p>
                    </td>
                </tr>
                <tr class="r1">
                    <td class="td14">
                        <p class="p2">
                            <span class="s1">性 别</span>
                        </p>
                    </td><td class="td15">
                        <p class="p3"></p>
                    </td><td class="td16">
                        <p class="p2">
                            <span class="s1">专 业</span>
                        </p>
                    </td><td class="td17" colspan="3">
                        <p class="p3"></p>
                    </td>
                </tr>
                <tr class="r2">
                    <td class="td18" colspan="6">
                        <p class="p4"></p>
                    </td>
                </tr>
                <tr class="r3">
                    <td class="td19">
                        <p class="p4">
                            <span class="s1">求</span>
                        </p>
                        <p class="p4">
                            <span class="s1">职</span>
                        </p>
                        <p class="p4">
                            <span class="s1">意</span>
                        </p>
                        <p class="p4">
                            <span class="s1">向</span>
                        </p>
                    </td><td class="td20" colspan="6">
                        <p class="p6"></p>
                    </td>
                </tr>
                <tr class="r4">
                    <td class="td21" colspan="7">
                        <p class="p7"></p>
                    </td>
                </tr>
                <tr class="r5">
                    <td class="td19">
                        <p class="p2">
                            <span class="s3">兴</span>
                        </p>
                        <p class="p2">
                            <span class="s3">趣</span>
                        </p>
                        <p class="p2">
                            <span class="s3">爱</span>
                        </p>
                        <p class="p2">
                            <span class="s3">好</span>
                        </p>
                    </td><td class="td20" colspan="6">
                        <p class="p8"></p>
                    </td>
                </tr>
                <tr class="r6">
                    <td class="td21" colspan="7">
                        <p class="p9"></p>
                    </td>
                </tr>
                <tr class="r5">
                    <td class="td19">
                        <p class="p4">
                            <span class="s3">社会</span>
                        </p>
                        <p class="p4">
                            <span class="s3">实践</span>
                        </p>
                        <p class="p4">
                            <span class="s3">经验</span>
                        </p>
                    </td><td class="td20" colspan="6">
                        <p class="p7"></p>
                    </td>
                </tr>
                <tr class="r7">
                    <td class="td21" colspan="7">
                        <p class="p10"></p>
                    </td>
                </tr>
                <tr class="r3">
                    <td class="td19">
                        <p class="p2">
                            <span class="s3">在校期间担任过何种职务</span>
                        </p>
                    </td><td class="td20" colspan="6">
                        <p class="p6"></p>
                    </td>
                </tr>
                <tr class="r8">
                    <td class="td21" colspan="7">
                        <p class="p9"></p>
                    </td>
                </tr>
                <tr class="r9">
                    <td class="td19">
                        <p class="p4">
                            <span class="s1">计</span>
                        </p>
                        <p class="p4">
                            <span class="s1">算</span>
                        </p>
                        <p class="p4">
                            <span class="s1">机</span>
                        </p>
                        <p class="p4">
                            <span class="s1">水</span>
                        </p>
                        <p class="p4">
                            <span class="s1">平</span>
                        </p>
                    </td><td class="td20" colspan="6">
                        <p class="p6"></p>
                    </td>
                </tr>
                <tr class="r7">
                    <td class="td21" colspan="7">
                        <p class="p7"></p>
                    </td>
                </tr>
                <tr class="r10">
                    <td class="td22">
                        <p class="p4">
                            <span class="s1">自</span>
                        </p>
                        <p class="p4">
                            <span class="s1">我</span>
                        </p>
                        <p class="p4">
                            <span class="s1">评</span>
                        </p>
                        <p class="p4">
                            <span class="s1">价</span>
                        </p>
                    </td><td class="td23" colspan="6">
                        <p class="p11"></p>
                    </td>
                </tr>
            </tbody>
        </table>
        <p class="p12"></p>
    </body>
</html>

 

 

 

注:下文中的 *** 代表文件名中的组件名称。 # 包: 中文-英文对照文档:【***-javadoc-API文档-中文(简体)-英语-对照版.zip】 jar包下载地址:【***.jar下载地址(官方地址+国内镜像地址).txt】 Maven依赖:【***.jar Maven依赖信息(可用于项目pom.xml).txt】 Gradle依赖:【***.jar Gradle依赖信息(可用于项目build.gradle).txt】 源代码下载地址:【***-sources.jar下载地址(官方地址+国内镜像地址).txt】 # 本文件关键字: 中文-英文对照文档,中英对照文档,java,jar包,Maven,第三方jar包,组件,开源组件,第三方组件,Gradle,中文API文档,手册,开发手册,使用手册,参考手册 # 使用方法: 解压 【***.jar中文文档.zip】,再解压其中的 【***-javadoc-API文档-中文(简体)版.zip】,双击 【index.html】 文件,即可用浏览器打开、进行查看。 # 特殊说明: ·本文档为人性化翻译,精心制作,请放心使用。 ·本文档为双语同时展示,一行原文、一行译文,可逐行对照,避免了原文/译文来回切换的麻烦; ·有原文可参照,不再担心翻译偏差误导; ·边学技术、边学英语。 ·只翻译了该翻译的内容,如:注释、说明、描述、用法讲解 等; ·不该翻译的内容保持原样,如:类名、方法名、包名、类型、关键字、代码 等。 # 温馨提示: (1)为了防止解压后路径太长导致浏览器无法打开,推荐在解压时选择“解压到当前文件夹”(放心,自带文件夹,文件不会散落一地); (2)有时,一套Java组件会有多个jar,所以在下载前,请仔细阅读本篇描述,以确保这就是你需要的文件;
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

网络达人丶

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值