JAVA docx/doc转html代码

最新推荐文章于 2024-08-01 10:57:13 发布

丶spare

最新推荐文章于 2024-08-01 10:57:13 发布

阅读量1.4k

点赞数

分类专栏： java 文章标签： poi java doc/docx转html

本文链接：https://blog.csdn.net/weixin_42612454/article/details/117250588

版权

java 专栏收录该内容

10 篇文章 1 订阅

订阅专栏

在项目中有doc/docx转换成html代码，然后渲染到前端的需求，在此之前只是大概了解过poi有这方面的能力，但并未实操过。在实现过程中看过很多教程，也踩了很多坑，但时隔太久忘记了。话不多说，直接贴代码。

pom.xml引入相关jar包

<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
    <version>1.0.6</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>ooxml-schemas</artifactId>
    <version>1.4</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.10.1</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>3.10.1</version>
</dependency>

工具类

package com.common.comm.util;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import javax.servlet.http.HttpServletRequest;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;

public class DocToHtml {
    private static final String ENCODING = "UTF-8";
    /**
     *
     * @param path docx文档所在文件夹路径
     * @param fileName docx文档名称
     * @param request
     * @return 转换后的html代码
     * @throws TransformerException
     * @throws IOException
     * @throws ParserConfigurationException
     */
    public static String docToHtml(String path, String fileName, HttpServletRequest request)
            throws TransformerException, IOException,
            ParserConfigurationException {
        if (path == null || "".equals(path) || fileName == null || "".equals(fileName)) {
            return "";
        }
        File file = new File(path + fileName);
        if (file.exists() && file.isFile()) {
            FileInputStream is = new FileInputStream(file);
            HWPFDocument wordDocument = new HWPFDocument(is);
            WordToHtmlConverter converter = new WordToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder()
                            .newDocument());
            String head = request.getRequestURL().toString().startsWith("https")?"https://":"http://";
            String port = (request.getServerPort()==80||request.getServerPort()==443)?"":(":"+request.getServerPort());
            String domain = head + request.getServerName() + port;
            converter.setPicturesManager((content, pictureType, suggestedName, widthInches, heightInches) -> domain + "/files/news/temp" + fileName + "/" + suggestedName);
            converter.processDocument(wordDocument);
            String basePath = path + "temp/" + fileName + "/";
            File dir = new File(basePath);
            if (!dir.exists()) {
                dir.mkdirs();
            }
            List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
            for (Picture pic : pics) {
                try {
                    pic.writeImageContent(new FileOutputStream(basePath + pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }

            StringWriter writer = new StringWriter();

            Transformer serializer = TransformerFactory.newInstance().newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, ENCODING);
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(
                    new DOMSource(converter.getDocument()),
                    new StreamResult(writer));
            writer.close();
            return writer.toString();
        } else {
            return "";
        }
    }

    /**
     * docx转html
     * @param path docx文档所在文件夹路径
     * @param fileName docx文档名称
     * @return 转换后的html代码
     * @throws Throwable
     */
    public static String docxToHtml(String path, String fileName) throws Throwable {
        String fileOutName = path + "temp/" + fileName + ".html";
        XWPFDocument document = new XWPFDocument(new FileInputStream(path + fileName));
        XHTMLOptions options = XHTMLOptions.create();
        // Extract image
        File imageFolder = new File(path + "/images/" + fileName);
        options.setExtractor(new FileImageExtractor(imageFolder));
        // URI resolver
        options.URIResolver(new FileURIResolver(imageFolder));

        OutputStream out = new FileOutputStream(new File(fileOutName));
        XHTMLConverter.getInstance().convert(document, out, options);
        StringBuilder content = new StringBuilder();
        File file = new File(fileOutName);
        try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
            String tempString;
            while ((tempString = reader.readLine()) != null) {
                content.append(tempString);
            }
            reader.close();
            out.close();
            file.delete();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return content.toString().replace(path, "/files/news/");
    }
}