Word文件.doc .docx转.html工具类实现在线预览

最新推荐文章于 2023-07-20 20:01:44 发布
风信子的故事
最新推荐文章于 2023-07-20 20:01:44 发布
阅读量357
点赞数
分类专栏：工具类
本文链接：https://blog.csdn.net/bll1992/article/details/118180810
版权
工具类专栏收录该内容
9 篇文章 0 订阅
订阅专栏
package com.cloudsky.utils;

import cn.hutool.core.io.FileTypeUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.IdUtil;
import com.cloudsky.config.Config;
import fr.opensagres.poi.xwpf.converter.core.BasicURIResolver;
import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.stereotype.Component;
import org.springframework.util.ResourceUtils;

import javax.annotation.Resource;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.nio.charset.StandardCharsets;

/**
 * <p>
 * Word文件.doc .docx转.html工具类
 * </p>
 *
 * @author
 * @since 2021-06-18
 */
@Component
public class WordToHtml {
    @Resource
    private Config configUtil;
    /**
     * 传入文件路径，上传路径，将doc或docx转html
     * @param sourceFilePath
     * @param uploadPath
     * @return
     * @throws Exception
     */
    public static String getHtmlStr(String sourceFilePath,String uploadPath){
        File file = FileUtil.file(sourceFilePath);
        String type = FileTypeUtil.getType(file);
        String targetFileStr="";

        try {
            if("doc".equals(type)){
                targetFileStr=docToHtml(sourceFilePath,uploadPath);
            }else{
                targetFileStr=docxToHtml(sourceFilePath,uploadPath);
            }
        } catch (Exception e) {
            e.printStackTrace();
           return "";
        }
        return readfile(targetFileStr);
    }
    public static String getHtmlStr2(String sourceFilePath){
        File file = FileUtil.file(sourceFilePath);
        String type = FileTypeUtil.getType(file);
        String targetFileStr="";

        try {
            if("doc".equals(type)){
                targetFileStr=docToHtml(sourceFilePath);
            }else{
                targetFileStr=docxToHtml(sourceFilePath);
            }
        } catch (Exception e) {
            e.printStackTrace();
            return "";
        }
        return readfile(targetFileStr);
    }
    /**
     * doc转html
     * @param sourceFileUrl
     * @param uploadPath
     * @return
     * @throws Exception
     */
    private static String docToHtml(String sourceFileUrl,String uploadPath){
        try {
            File path = new File(ResourceUtils.getURL("classpath:").getPath());
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
            String imagePathStr = uploadPath + "tmp"+File.separator+"image"+File.separator;
        String sourceFileName = sourceFileUrl;
        String targetFileName = uploadPath + "tmp"+File.separator+"html"+File.separator+ IdUtil.simpleUUID()+".html";
        File file = new File(imagePathStr);
        if(!file.exists()) {
            file.mkdirs();
        }
        HWPFDocument wordDocument = null;
        try {
            wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
        } catch (IOException e) {
            e.printStackTrace();
        }
        org.w3c.dom.Document document = null;
        try {
            document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        }
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
        //保存图片，并返回图片的相对路径
        wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
            try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
                out.write(content);
            } catch (Exception e) {
                e.printStackTrace();
            }
            return "tmp"+File.separator + "image" +File.separator + name;
        });
        wordToHtmlConverter.processDocument(wordDocument);
        org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(new File(targetFileName));
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = null;
        try {
            serializer = tf.newTransformer();
        } catch (TransformerConfigurationException e) {
            e.printStackTrace();
        }
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        try {
            serializer.transform(domSource, streamResult);
        } catch (TransformerException e) {
            e.printStackTrace();
        }
        return targetFileName;
    }

    /**
     * 文件上传后，docx转html
     * @param sourceFileUrl
     * @param uploadPath
     * @return
     * @throws Exception
     */
    private static String docxToHtml(String sourceFileUrl,String uploadPath) throws Exception {
        File path = new File(ResourceUtils.getURL("classpath:").getPath());
        String imagePathStr = uploadPath + "tmp"+File.separator+"image"+File.separator;
        String sourceFileName = sourceFileUrl;
        String targetFileName = uploadPath + "tmp"+File.separator+"html"+File.separator+IdUtil.simpleUUID()+".html";

        OutputStreamWriter outputStreamWriter = null;
        try {
            XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
            XHTMLOptions options = XHTMLOptions.create();
            // 存放图片的文件夹
            options.setExtractor(new FileImageExtractor(new File(imagePathStr)));
            // html中图片的路径
            options.URIResolver(new BasicURIResolver("tmp"+File.separator+"image"));
            outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
            xhtmlConverter.convert(document, outputStreamWriter, options);
        } finally {
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
        }
        return targetFileName;

    }

    /**
     * 文件直接预览
     * @param sourceFileUrl
     * @return
     * @throws Exception
     */
    public static String docToHtml(String sourceFileUrl) throws Exception {
        File path = new File(ResourceUtils.getURL("classpath:").getPath());
        File tempFile = new File(sourceFileUrl.trim());
        String filename = tempFile.getName().split("\\.")[0];
        String imagePathStr = System.getProperty("user.dir")+File.separator+"cloudshop-product/"+File.separator+"doc"+File.separator+"image"+File.separator;
        String targetFileName = System.getProperty("user.dir")+File.separator+"cloudshop-product/"+File.separator+"doc"+File.separator+"html"+File.separator+filename+".html";

        File file = new File(imagePathStr);
        if(!file.exists()) {
            file.mkdirs();
        }
        HWPFDocument wordDocument = null;
        try {
            wordDocument = new HWPFDocument(new FileInputStream(sourceFileUrl));
        } catch (IOException e) {
            e.printStackTrace();
        }
        org.w3c.dom.Document document = null;
        try {
            document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        }
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
        //保存图片，并返回图片的相对路径
        wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
            try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
                out.write(content);
            } catch (Exception e) {
                e.printStackTrace();
            }
            return "doc"+File.separator + "image" +File.separator + name;
        });
        wordToHtmlConverter.processDocument(wordDocument);
        org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(new File(targetFileName));
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = null;
        try {
            serializer = tf.newTransformer();
        } catch (TransformerConfigurationException e) {
            e.printStackTrace();
        }
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        try {
            serializer.transform(domSource, streamResult);
        } catch (TransformerException e) {
            e.printStackTrace();
        }
        return targetFileName;
    }

    public static String docxToHtml(String sourceFileUrl) throws Exception {
        File path = new File(ResourceUtils.getURL("classpath:").getPath());
        File tempFile = new File(sourceFileUrl.trim());
        String filename = tempFile.getName().split("\\.")[0];
        String imagePathStr = System.getProperty("user.dir")+File.separator+"cloudshop-product/"+File.separator+"doc"+File.separator+"image"+File.separator;
        String targetFileName = System.getProperty("user.dir")+File.separator+"cloudshop-product/"+File.separator+"doc"+File.separator+"html"+File.separator+filename+".html";

        OutputStreamWriter outputStreamWriter = null;
        try {
            XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileUrl));
            XHTMLOptions options = XHTMLOptions.create();
            // 存放图片的文件夹
            options.setExtractor(new FileImageExtractor(new File(imagePathStr)));
            // html中图片的路径
            options.URIResolver(new BasicURIResolver("doc"+File.separator+"image"));
            outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
            xhtmlConverter.convert(document, outputStreamWriter, options);
        } finally {
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
        }
        return targetFileName;
    }
    private static String readfile(String filePath) {
        StringBuffer buffer = new StringBuffer();
        try {
            FileInputStream fis = new FileInputStream(new File(filePath));
            InputStreamReader reader = new InputStreamReader(fis,StandardCharsets.UTF_8); //最后的"GBK"根据文件属性而定，如果不行，改成"UTF-8"试试
            BufferedReader br = new BufferedReader(reader);
            String line;
            while ((line = br.readLine()) != null) {
                buffer.append(line);
            }
            br.close();
            reader.close();

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return buffer.toString();
    }
}
风信子的故事
关注
0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Word文件.doc .docx转.html工具类实现在线预览

package com.cloudsky.utils;import cn.hutool.core.io.FileTypeUtil;import cn.hutool.core.io.FileUtil;import cn.hutool.core.util.IdUtil;import com.cloudsky.config.Config;import fr.opensagres.poi.xwpf.converter.core.BasicURIResolver;import fr.opensagre.
复制链接

扫一扫