minio文档(doc、docx)转html

提前引入依赖

		<!--注意版本保持一致 poi poi-ooxml  poi-scratchpad-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>4.1.2</version>
        </dependency>
        <!-- 操作doc ppt xls  -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>4.1.2</version>
        </dependency>
        <!-- 操作docx pptx xlsx  -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>4.1.2</version>
        </dependency>

        <!--httpclient依赖-->
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.3</version>
        </dependency>
        <dependency>
            <groupId>commons-httpclient</groupId>
            <artifactId>commons-httpclient</artifactId>
            <version>3.1</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
            <version>2.0.2</version>
        </dependency>
package com.openness.config;

import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;

public class WordToHtmlUtil {

    public static String getWordToHtml(String filePath) throws IOException {

        URL url = new URL(filePath);
        File file = urlToFile(url);

        String sub = filePath.substring(filePath.lastIndexOf("."));

        //doc文档转html
        if(sub.equals(".doc")){
            try {
                assert file != null;
                FileInputStream inputStream = new FileInputStream(file);
                HWPFDocument hwpfDocument = new HWPFDocument(inputStream);//构造函数放入文件流得到HWPFDocument对象
                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                        DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());//构造WordToHtmlConverter对象

                //开始解析doc文档---------------------------------------------
                wordToHtmlConverter.processDocument(hwpfDocument);
                Document document = wordToHtmlConverter.getDocument();
                //通过TransformerFactory创造出Transformer ,并设置Transformer的属性
                TransformerFactory transformerFactory = TransformerFactory.newInstance();
                Transformer transformer = transformerFactory.newTransformer();
                transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                transformer.setOutputProperty(OutputKeys.INDENT, "yes");
                transformer.setOutputProperty(OutputKeys.METHOD, "html");

                //transformer.transform()需要参数1 Source 参数2 Result
                DOMSource domSource = new DOMSource(document);
                ByteArrayOutputStream outputtarget = new ByteArrayOutputStream();
                StreamResult streamResult = new StreamResult(outputtarget);
                //开始转换,结果数据在ByteArrayOutputStream里
                transformer.transform(domSource, streamResult);//参数1 Source 参数2 Result

                //转成字符串并返回
                return outputtarget.toString("utf-8");

            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
        //docx文档转html
        else if(sub.equals(".docx")){
            try {
                assert file != null;
                FileInputStream inputStream = new FileInputStream(file);
                //创建操作docx word的对象
                XWPFDocument xwpfDocument = new XWPFDocument(inputStream);
                //解析XHTML配置
                XHTMLOptions xhtmlOptions = XHTMLOptions.create();
                //将样式都写为内联样式,而不是写到style标签中 默认false
                xhtmlOptions.setFragment(true);
                xhtmlOptions.setIgnoreStylesIfUnused(false);
                xhtmlOptions.setImageManager(new Base64EmbedImgManager());//图片用base64转化

                //将XWPFDocument转化成HTML
                ByteArrayOutputStream outputtarget = new ByteArrayOutputStream();
                XHTMLConverter.getInstance().convert(xwpfDocument,outputtarget,xhtmlOptions);

                //转成字符串并返回
                return outputtarget.toString("utf-8");

            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }else{
            return "文件格式错误";
        }

    }

    //url转file
    public static File urlToFile(URL url) {
        InputStream is = null;
        File file = null;
        FileOutputStream fos = null;
        try {
            file = File.createTempFile("tmp", null);
            URLConnection urlConn = null;
            urlConn = url.openConnection();
            is = urlConn.getInputStream();
            fos = new FileOutputStream(file);
            byte[] buffer = new byte[4096];
            int length;
            while ((length = is.read(buffer)) > 0) {
                fos.write(buffer, 0, length);
            }
            return file;
        } catch (IOException e) {
            return null;
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                }
            }
            if (fos != null) {
                try {
                    fos.close();
                } catch (IOException e) {
                }
            }
        }
    }

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值