Word转Html工具类个人总结

首先引入依赖:
版本:

 <poi-scratchpad.version>3.14</poi-scratchpad.version>
    <poi-ooxml.version>3.14</poi-ooxml.version>
    <xdocreport.version>1.0.6</xdocreport.version>
    <poi-ooxml-schemas.version>3.14</poi-ooxml-schemas.version>
    <ooxml-schemas.version>1.3</ooxml-schemas.version>
    <jsoup.version>1.11.3</jsoup.version>
<!--        word ===> HTML-->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-scratchpad</artifactId>
      <version>${poi-scratchpad.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml</artifactId>
      <version>${poi-ooxml.version}</version>
    </dependency>
    <dependency>
      <groupId>fr.opensagres.xdocreport</groupId>
      <artifactId>xdocreport</artifactId>
      <version>${xdocreport.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml-schemas</artifactId>
      <version>${poi-ooxml-schemas.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>ooxml-schemas</artifactId>
      <version>${ooxml-schemas.version}</version>
    </dependency>
    <dependency>
      <groupId>org.jsoup</groupId>
      <artifactId>jsoup</artifactId>
      <version>${jsoup.version}</version>
    </dependency>

工具类

package com.tencent.tusi.common.utils;

import com.tencent.tusi.common.utils.minio.MinioUtil;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.xwpf.converter.core.IImageExtractor;
import org.apache.poi.xwpf.converter.core.IURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.HashMap;
import java.util.Map;

import static com.tencent.tusi.common.constant.PublicConstant.*;

public class Word2Html {

    public static String Word2Html(byte[] inputStream) throws Exception {
        InputStream word2007 = new ByteArrayInputStream(inputStream);
        HWPFDocument hwpfDocument = null;
        InputStream word2003 = new ByteArrayInputStream(inputStream);
        try {
//            只有office 2003及以下版本能解析
            hwpfDocument = new HWPFDocument(word2003);
        } catch (OfficeXmlFileException e) {
//            如果是版本问题,则使用2007版本
            return Word2007ToHtml(word2007);
        }
        return Word2003ToHtml(hwpfDocument);
    }

    private static String Word2003ToHtml(HWPFDocument wordDocument) throws Exception {
        //HWPFDocument wordDocument = new HWPFDocument(inputStream);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
//      // 合同中无图片,暂不设置图片管理
        //解析word文档
        wordToHtmlConverter.processDocument(wordDocument);
        Document htmlDocument = wordToHtmlConverter.getDocument();
        String htmlName = Tools.getRandomHTMLFileName();
        String htmlPath =  htmlName;
        File htmlFile = new File(htmlPath);
        OutputStream outStream = new FileOutputStream(htmlFile);

        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);

        TransformerFactory factory = TransformerFactory.newInstance();
        Transformer serializer = factory.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");

        serializer.transform(domSource, streamResult);
        outStream.close();

        // 将文件上传到文件服务器中
        String result = MinioUtil.uploadFileByInputStream(htmlFile,Tools.file2InputStream(htmlFile),CONTRACT,TEMPLATE_DATA);
        if (!StringUtils.isEmpty(result)){
            htmlFile.delete();
        }
        return result;
    }

    public static String Word2007ToHtml(InputStream input)
            throws IOException {
        XWPFDocument document = new XWPFDocument(input);
        // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
        XHTMLOptions options = XHTMLOptions.create();
        Map<String, String> imgMap = new HashMap<>();

        options.setExtractor(new IImageExtractor() {
            @Override
            public void extract(String imagePath, byte[] imageData) throws IOException {
                //获取图片数据并且上传
                System.out.println(imagePath);
                String fileName = imagePath.substring(imagePath.lastIndexOf("."));
                String url = MinioUtil.uploadImgSimple(imageData,fileName,CONTRACT,TEMPLATE_DATA);
                imgMap.put(imagePath, url);
            }
        });

        // html中图片的路径 相对路径
        options.URIResolver(new IURIResolver() {
            @Override
            public String resolve(String uri) {
                //设置图片路径
                return imgMap.get(uri);
            }
        });
        options.setIgnoreStylesIfUnused(false);
        options.setFragment(true);
        // 3) 将 XWPFDocument转换成XHTML
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        XHTMLConverter.getInstance().convert(document, baos, options);
        InputStream inputStream = new ByteArrayInputStream(baos.toByteArray());

        int index;
        byte[] bytes = new byte[1024];
        String allFilePath = Tools.getRandomHTMLFileName();
        // 会自动创建文件并写入
        FileOutputStream downloadFile = new FileOutputStream(allFilePath);
        String htmlHead = "<!DOCTYPE html>" ;
        byte[] headByte = htmlHead.getBytes();
        downloadFile.write(headByte);
        while ((index = inputStream.read(bytes)) != -1) {
            //downloadFile.write(bytes, 0, index);
            downloadFile.write(bytes, headByte.length, index);
            downloadFile.flush();
        }
        downloadFile.close();
        inputStream.close();

        File file = new File(allFilePath);
        InputStream inputStream1 = Tools.file2InputStream(file);
        String result = "";
        try {
            result = MinioUtil.uploadFileByInputStream(file,inputStream1,CONTRACT,TEMPLATE_DATA);
            file.delete();
        } catch (Exception e) {
            e.printStackTrace();
        }
//        String content = baos.toString();
        baos.close();
        return result;
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值