依赖包如下:
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>1.0.5</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.5</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.12</version>
</dependency>
<dependency>
<groupId> e-iceblue </groupId>
<artifactId>spire.pdf</artifactId>
<version>3.4.2</version>
</dependency>
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.pdf.free</artifactId>
<version>2.6.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<repositories>
<repository>
<id>com.e-iceblue</id>
<url>http://repo.e-iceblue.cn/repository/maven-public/</url>
</repository>
</repositories>
package com.vpclub.common.utils;
import com.spire.pdf.FileFormat;
import com.spire.pdf.PdfDocument;
import com.vpclub.common.exception.ErrorCode;
import com.vpclub.common.exception.RenException;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.ClassPathResource;
import org.springframework.util.StringUtils;
import org.springframework.web.multipart.MultipartFile;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.UUID;
/**
* @Description: 文件生成html内容
* @Author: zhao.xx
* @Date: 2020/6/3
*/
public class FileToHtmlUtils {
private static final Logger logger = LoggerFactory.getLogger(FileToHtmlUtils.class);
/**
* word文档生成html内容
* @param file 文件对象
* @param filePath word文档图片保存路径 缺省默认为项目路径
* @return string html字符串
* */
public static String docToHtml(MultipartFile file,String filePath){
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try{
if(StringUtils.isEmpty(filePath)){
filePath = new ClassPathResource("").getFile().getAbsolutePath() + "/temp";
}
XWPFDocument document = new XWPFDocument(file.getInputStream()); // ) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
File imageFolderFile = new File(filePath);
XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
options.setIgnoreStylesIfUnused(false);
options.setFragment(true); // ) 将 XWPFDocument转换成XHTML
// 也可以使用字符数组流获取解析的内容
XHTMLConverter.getInstance().convert(document, baos, options);
return baos.toString();
}catch (Exception e){
logger.error(e.getMessage(), e);
throw new RenException(ErrorCode.INTERNAL_SERVER_ERROR, "文件转换错误,请联系管理员");
}finally {
try{
baos.close();
}catch (IOException e){
logger.error(e.getMessage(), e);
throw new RenException(ErrorCode.INTERNAL_SERVER_ERROR, "文件转换错误,请联系管理员");
}
}
}
/**
* 将pdf转html字符串
* @param file 文件对象
* @param filePath word文档图片保存路径 缺省为项目路径
* @return string html字符串
* */
public static String pdfToHtml(MultipartFile file,String filePath){
try{
if(StringUtils.isEmpty(filePath)){
filePath = new ClassPathResource("").getFile().getAbsolutePath() + "/temp";
}
String uuid = UUID.randomUUID().toString().replaceAll("-","");
StringBuffer buffer = new StringBuffer();
buffer.append(filePath).append(uuid).append(".html");
//将pdf转html文件
PdfDocument pdf = new PdfDocument(file.getInputStream());
pdf.saveToFile(buffer.toString(), FileFormat.HTML);
//从html文件中获取boby字符串
Elements doc = Jsoup.parse(new File(buffer.toString()),"UTF-8").body().children();
//解析完成后删除html文件
File htmlFile = new File(buffer.toString());
htmlFile.delete();
return doc.toString();
}catch (Exception e){
logger.error(e.getMessage(), e);
throw new RenException(ErrorCode.INTERNAL_SERVER_ERROR, "文件转换错误,请联系管理员");
}
}
}