将word转化为html,并读取html内容

/**
     * doc转为html
     * @param sourceFileName    源文件路径
     * @param htmlPath  目标文件路径
     * @return
     * @throws Exception
     */
    public static boolean docToHtml(String  sourceFileName, String htmlPath) throws Exception {
        HWPFDocumentCore wordDocument =  WordToHtmlUtils.loadDoc(new FileInputStream(sourceFileName));
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.processDocument(wordDocument);
        Document htmlDocument = wordToHtmlConverter.getDocument();
        OutputStream out = new FileOutputStream(new File(htmlPath));
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer  = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();
        return true;
    }

    /**
     * 将docx转为html
     * @param sourceFileName 源文件路径
     * @param fileUrl 目标文件路径
     * @return
     * @throws Exception
     */
    public static boolean docxToHtml(String sourceFileName,String fileUrl) throws Exception{

         File wordFile = new File(sourceFileName).getAbsoluteFile(), htmlFile = new File(fileUrl);
        try {
             InputStream inputStream = new FileInputStream(wordFile);// 输入流
             XWPFDocument document = new XWPFDocument(inputStream);// 读取word文档
            inputStream.close();// 关闭输入流
             XHTMLOptions options = XHTMLOptions.create();// 创建选项
//            options.setImageManager(new ImageManager(wordFile.getParentFile(), "PoiImages"));// 设置图片文件夹保存的路径以及文件夹名称
             OutputStream outputStream = new FileOutputStream(htmlFile);// 输出流
            XHTMLConverter.getInstance().convert(document, outputStream, options);// word文档转html
            outputStream.close();// 关闭输出流
            document.close();// 关闭文档
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }
        return true;
    }


依赖
  <!--操作日志-->
        <dependency>
            <groupId>commons-logging</groupId>
            <artifactId>commons-logging</artifactId>
            <version>1.2</version>
        </dependency>
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>
        <!--操作word-->
        <dependency>
            <groupId>commons-codec</groupId>
            <artifactId>commons-codec</artifactId>
            <version>1.10</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-collections4</artifactId>
            <version>4.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-examples</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-excelant</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.xmlbeans</groupId>
            <artifactId>xmlbeans</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>ooxml-schemas</artifactId>
            <version>1.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.poi.xwpf.converter.core</artifactId>
            <version>2.0.1</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
            <version>2.0.1</version>
        </dependency>
        <!--操作文件-->
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.5</version>
        </dependency>

 

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值