java word和pdf文件生成html

最新推荐文章于 2022-12-16 17:05:44 发布

qq_20076823

最新推荐文章于 2022-12-16 17:05:44 发布

阅读量371

点赞数

本文链接：https://blog.csdn.net/qq_20076823/article/details/106548037

版权

Java word转html 同时被 2 个专栏收录

1 篇文章 0 订阅

订阅专栏

Java pdf转html

1 篇文章 0 订阅

订阅专栏

依赖包如下：
<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>fr.opensagres.xdocreport.document</artifactId>
    <version>1.0.5</version>
</dependency>

<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
    <version>1.0.5</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.12</version>
</dependency>

<dependency>
    <groupId> e-iceblue </groupId>
    <artifactId>spire.pdf</artifactId>
    <version>3.4.2</version>
</dependency>
<dependency>
    <groupId>e-iceblue</groupId>
    <artifactId>spire.pdf.free</artifactId>
    <version>2.6.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.11.3</version>
</dependency>

<repositories>
    <repository>
        <id>com.e-iceblue</id>
        <url>http://repo.e-iceblue.cn/repository/maven-public/</url>
    </repository>
</repositories>

package com.vpclub.common.utils;
import com.spire.pdf.FileFormat;
import com.spire.pdf.PdfDocument;
import com.vpclub.common.exception.ErrorCode;
import com.vpclub.common.exception.RenException;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.ClassPathResource;
import org.springframework.util.StringUtils;
import org.springframework.web.multipart.MultipartFile;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.UUID;

/** 
* @Description: 文件生成html内容
* @Author: zhao.xx 
* @Date: 2020/6/3 
*/ 
public class FileToHtmlUtils {

    private static final Logger logger = LoggerFactory.getLogger(FileToHtmlUtils.class);

    /**
     * word文档生成html内容
     * @param file 文件对象
     * @param filePath  word文档图片保存路径 缺省默认为项目路径
     * @return string html字符串
     * */
    public static String docToHtml(MultipartFile file,String filePath){
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        try{
            if(StringUtils.isEmpty(filePath)){
                filePath = new ClassPathResource("").getFile().getAbsolutePath() + "/temp";
            }
            XWPFDocument document = new XWPFDocument(file.getInputStream());    // ) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
            File imageFolderFile = new File(filePath);
            XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
            options.setExtractor(new FileImageExtractor(imageFolderFile));
            options.setIgnoreStylesIfUnused(false);
            options.setFragment(true);    // ) 将 XWPFDocument转换成XHTML
            // 也可以使用字符数组流获取解析的内容
            XHTMLConverter.getInstance().convert(document, baos, options);
            return baos.toString();
        }catch (Exception e){
            logger.error(e.getMessage(), e);
            throw new RenException(ErrorCode.INTERNAL_SERVER_ERROR, "文件转换错误，请联系管理员");
        }finally {
            try{
                baos.close();
            }catch (IOException e){
                logger.error(e.getMessage(), e);
                throw new RenException(ErrorCode.INTERNAL_SERVER_ERROR, "文件转换错误，请联系管理员");
            }

        }
    }

    /**
     * 将pdf转html字符串
     * @param file 文件对象
     * @param filePath  word文档图片保存路径 缺省为项目路径
     * @return string html字符串
     * */
    public static String pdfToHtml(MultipartFile file,String filePath){
        try{
            if(StringUtils.isEmpty(filePath)){
                filePath = new ClassPathResource("").getFile().getAbsolutePath() + "/temp";
            }
            String uuid = UUID.randomUUID().toString().replaceAll("-","");
            StringBuffer buffer = new StringBuffer();
            buffer.append(filePath).append(uuid).append(".html");
            //将pdf转html文件
            PdfDocument pdf = new PdfDocument(file.getInputStream());
            pdf.saveToFile(buffer.toString(), FileFormat.HTML);
            //从html文件中获取boby字符串
            Elements doc = Jsoup.parse(new File(buffer.toString()),"UTF-8").body().children();
            //解析完成后删除html文件
            File htmlFile = new File(buffer.toString());
            htmlFile.delete();
            return doc.toString();
        }catch (Exception e){
            logger.error(e.getMessage(), e);
            throw new RenException(ErrorCode.INTERNAL_SERVER_ERROR, "文件转换错误，请联系管理员");
        }

    }

}

qq_20076823

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
java word和pdf文件生成html

依赖包如下：<dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.xdocreport.document</artifactId> <version>1.0.5</version></dependency><dependency> <gr.
复制链接

扫一扫