实现DOC、DOCX转换为PDF 再将PDF转换为图片
首先导入需要的依赖
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.4.7</version>
</dependency>
<!--pdf转换工具-->
<dependency>
<groupId>net.sf.cssbox</groupId>
<artifactId>pdf2dom</artifactId>
<version>1.7</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.12</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>2.0.12</version>
</dependency>
<dependency>
<groupId>com.lowagie</groupId>
<artifactId>itext</artifactId>
<version>2.0.7</version>
</dependency>
<!--文档转换工具-->
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-words</artifactId>
<version>15.8.0</version>
<scope>system</scope>
<systemPath>${basedir}/lib/aspose-words-15.8.0-jdk16.jar</systemPath>
</dependency>
其中aspose-words-15.8.0-jdk16.jar需要从外部引入:链接:https://pan.baidu.com/s/1eqMR_6lvt8HHIAxTvs09fA 提取码:o5ct
资源目录下添加Locense.xml
<License>
<Data>
<Products>
<Product>Aspose.Total for Java</Product>
<Product>Aspose.Words for Java</Product>
</Products>
<EditionType>Enterprise</EditionType>
<SubscriptionExpiry>20991231</SubscriptionExpiry>
<LicenseExpiry>20991231</LicenseExpiry>
<SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber>
</Data>
<Signature>
sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=
</Signature>
</License>
将word转换为PDF的工具类
import cn.hutool.core.collection.CollUtil;
import cn.hutool.system.OsInfo;
import cn.hutool.system.SystemUtil;
import com.aspose.words.Document;
import com.aspose.words.FontSettings;
import com.aspose.words.License;
import com.aspose.words.SaveFormat;
import lombok.extern.slf4j.Slf4j;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.Arrays;
@Slf4j
public class AsposeUtil {
private static final String[] WORD = {"doc", "docx", "wps", "wpt", "txt"};
private static final String[] EXCEL = {"xls", "xlsx", "et", "xlsm"};
private static final String[] PPT = {"ppt", "pptx"};
private static final String[] PDF = {"pdf"};
private static final String[] IMG = {"bmp", "jpg", "png", "tif", "gif", "pcx", "tga", "exif", "fpx", "svg", "psd", "cdr", "pcd", "dxf", "ufo", "eps", "ai", "raw", "WMF", "webp", "avif", "apng"};
private static final String TYPE_UNSUPPORT = "不支持的格式";
private static final String TYPE_WORD = "TYPE_WORD";
private static final String TYPE_EXCEL = "TYPE_EXCEL";
private static final String TYPE_PPT = "TYPE_PPT";
private static final String TYPE_PDF = "TYPE_PDF";
private static final String TYPE_IMG = "TYPE_IMG";
private boolean judgeLicense() {
boolean result = false;
try {
InputStream is = AsposeUtil.class.getClassLoader().getResourceAsStream("License.xml");
License aposeLic = new License();
aposeLic.setLicense(is);
result = true;
} catch (Exception e) {
log.error("Aspose License 文档转换失败!", e);
}
return result;
}
/**
* 根据文件名判断文件类型
*/
private String getType(String fileName) {
String suffix = fileName.substring(fileName.lastIndexOf(".") + 1).toLowerCase(); // 后缀
if (CollUtil.contains(Arrays.asList(WORD), suffix)) {
return TYPE_WORD;
} else if (CollUtil.contains(Arrays.asList(EXCEL), suffix)) {
return TYPE_EXCEL;
} else if (CollUtil.contains(Arrays.asList(PPT), suffix)) {
return TYPE_PPT;
} else if (CollUtil.contains(Arrays.asList(PDF), suffix)) {
return TYPE_PDF;
} else if (CollUtil.contains(Arrays.asList(IMG), suffix)) {
return TYPE_IMG;
} else {
return TYPE_UNSUPPORT;
}
}
public static void main(String[] args) throws Exception {
}
/**
* 文件转化pdf
* @param fileName 文件名称
* @param in 文件输入流
* @return 转换后的pdf地址 或 格式不支持预览
*/
public byte[] toPdf(String fileName, InputStream in) throws Exception {
if (!judgeLicense()) {
throw new Exception();
}
String type = getType(fileName);
if (TYPE_WORD.equals(type)) {
try (ByteArrayOutputStream tmp = wordToPdfStream(in)) {
return tmp.toByteArray();
} catch (Exception e) {
log.error("word转换pdf失败!", e);
}
}
return new byte[0];
}
private ByteArrayOutputStream wordToPdfStream(InputStream in) throws Exception {
Document doc = new Document(in);
OsInfo osInfo = SystemUtil.getOsInfo();
if (osInfo.isLinux()) {// 提前将字体安装到linux如下路径 TODO 如果是linux环境则需要将windows下字体(C:\Windows\Fonts)提前复制到这个目录下
FontSettings.setFontsFolder("/usr/share/fonts/chinese", true);
}
ByteArrayOutputStream dstStream = new ByteArrayOutputStream();
doc.save(dstStream, SaveFormat.PDF);
return dstStream;
}
//private ByteArrayOutputStream excelToPdfStream(InputStream in) throws Exception {
// Workbook excel = new Workbook(in);
// ByteArrayOutputStream dstStream = new ByteArrayOutputStream();
// excel.save(dstStream, SaveFormat.PDF);
// return dstStream;
//}
//
//private ByteArrayOutputStream pptToPdfStream(InputStream in) throws Exception {
// Presentation ppt = new Presentation (in);
// ByteArrayOutputStream dstStream = new ByteArrayOutputStream();
// ppt.save(dstStream, SaveFormat.PDF);
// return dstStream;
//}
}
将PDF转换为图片的工具类
import cn.hutool.core.util.ObjectUtil;
import com.google.common.collect.Lists;
import com.lowagie.text.pdf.PdfReader;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;
@Slf4j
public class PdfUtil {
public static final int DEFAULT_DPI = 150;
/***
* PDF文件转PNG图片,全部页数
*
* @param pdfFilePath pdf完整路径
* @param dpi dpi越大转换后越清晰,相对转换速度越慢
*/
public static void pdf2Image(String pdfFilePath, int dpi) {
File file = new File(pdfFilePath);
PDDocument pdDocument;
try {
String imgPdfPath = file.getParent();
int dot = file.getName().lastIndexOf('.');
// 获取图片文件名
String imagePdfName = file.getName().substring(0, dot);
pdDocument = PDDocument.load(file);
PDFRenderer renderer = new PDFRenderer(pdDocument);
/* dpi越大转换后越清晰,相对转换速度越慢 */
PdfReader reader = new PdfReader(pdfFilePath);
int pages = reader.getNumberOfPages();
StringBuffer imgFilePath;
for (int i = 0; i < pages; i++) {
String imgFilePathPrefix = imgPdfPath + File.separator + imagePdfName;
imgFilePath = new StringBuffer();
imgFilePath.append(imgFilePathPrefix);
imgFilePath.append("_");
imgFilePath.append((i + 1));
imgFilePath.append(".png");
File dstFile = new File(imgFilePath.toString());
BufferedImage image = renderer.renderImageWithDPI(i, dpi);
ImageIO.write(image, "png", dstFile);
}
log.info("PDF文档转PNG图片成功!");
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* pdf转图片
* 多页PDF会每页转换为一张图片,下面会有多页组合成一页的方法
* @param pdfFile pdf文件路径
* @param outPath 图片输出路径
* @param dpi 相当于图片的分辨率,值越大越清晰,但是转换时间变长
*/
public static void pdf2multiImageFile(String pdfFile, String outPath, int dpi) {
if (ObjectUtil.isEmpty(dpi)) {
// 如果没有设置DPI,默认设置为150
dpi = DEFAULT_DPI;
}
try (PDDocument pdf = PDDocument.load(new FileInputStream(pdfFile))) {
int actSize = pdf.getNumberOfPages();
List<BufferedImage> picList = Lists.newArrayList();
for (int i = 0; i < actSize; i++) {
BufferedImage image = new PDFRenderer(pdf).renderImageWithDPI(i, dpi, ImageType.RGB);
picList.add(image);
}
// 组合图片
ImageUtil.combinationImages2File(picList, outPath);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* pdf转图片
* 多页PDF会每页转换为一张图片,下面会有多页组合成一页的方法
* @param pdfBytes pdf文件字节数组
* @param dpi 相当于图片的分辨率,值越大越清晰,但是转换时间变长
*/
public static byte[] pdfBytes2multiImageBytes(byte[] pdfBytes, int dpi) {
if (ArrayUtils.isEmpty(pdfBytes)) {
return new byte[0];
}
if (dpi <= 0) {
// 如果没有设置DPI,默认设置为150
dpi = DEFAULT_DPI;
}
try (PDDocument pdf = PDDocument.load(pdfBytes)) {
int actSize = pdf.getNumberOfPages();
List<BufferedImage> picList = Lists.newArrayList();
PDFRenderer renderer = new PDFRenderer(pdf);
for (int i = 0; i < actSize; i++) {
BufferedImage image = renderer.renderImageWithDPI(i, dpi, ImageType.RGB);
picList.add(image);
}
// 组合图片
return ImageUtil.combinationImages2Bytes(picList);
} catch (IOException e) {
log.error("pdf解析失败!", e);
}
return new byte[0];
}
}
实现多张图片组合的工具类
import lombok.extern.slf4j.Slf4j;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@Slf4j
public class ImageUtil {
/**
* 将宽度相同的图片,竖向追加在一起 ##注意:宽度必须相同
* @param picList 文件流数组
*/
private static BufferedImage yPic(List<BufferedImage> picList) {// 纵向处理图片
if (picList == null || picList.size() <= 0) {
log.info("图片数组为空!");
return null;
}
try {
// 总高度
int height = 0,
// 总宽度
width = 0,
// 临时的高度 , 或保存偏移高度
offsetHeight,
// 临时的高度,主要保存每个高度
tmpHeight,
// 图片的数量
picNum = picList.size();
// 保存每个文件的高度
int[] heightArray = new int[picNum];
// 保存图片流
BufferedImage buffer;
// 保存所有的图片的RGB
List<int[]> imgRgb = new ArrayList<>();
// 保存一张图片中的RGB数据
int[] tmpImgRgb;
for (int i = 0; i < picNum; i++) {
buffer = picList.get(i);
// 图片高度
heightArray[i] = offsetHeight = buffer.getHeight();
if (i == 0) {
// 图片宽度
width = buffer.getWidth();
}
// 获取总高度
height += offsetHeight;
// 从图片中读取RGB
tmpImgRgb = new int[width * offsetHeight];
tmpImgRgb = buffer.getRGB(0, 0, width, offsetHeight, tmpImgRgb, 0, width);
imgRgb.add(tmpImgRgb);
}
// 设置偏移高度为0
offsetHeight = 0;
// 生成新图片
BufferedImage imageResult = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
for (int i = 0; i < picNum; i++) {
tmpHeight = heightArray[i];
if (i != 0) {
// 计算偏移高度
offsetHeight += tmpHeight;
}
// 写入流中
imageResult.setRGB(0, offsetHeight, width, tmpHeight, imgRgb.get(i), 0, width);
}
return imageResult;
} catch (Exception e) {
log.error("图片合成失败!", e);
}
return null;
}
/**
* 将图片列表组合成长图
* 注意:文件宽度必须一致
* @param picList 图片列表
* @return 转换后的字节数组
*/
public static byte[] combinationImages2Bytes(List<BufferedImage> picList) {
BufferedImage image = yPic(picList);
if (image != null) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
boolean png = ImageIO.write(image, "png", baos);
if (png) {
return baos.toByteArray();
}
} catch (IOException e) {
log.error("图片组合失败!", e);
}
}
return new byte[0];
}
/**
* 将图片组合并输出到文件中
* @param picList 文件流数组
* @param outPath 输出路径
*/
public static void combinationImages2File(List<BufferedImage> picList, String outPath) throws IOException, NullPointerException {
BufferedImage image = yPic(picList);
File outFile = new File(outPath);
ImageIO.write(image, "png", outFile);
}
}
示例代码
public class TestDemo {
// 文件绝对路径
private static String filepath = "D:\\***\\";
// 文件名称
private static String filename = "****.docx";
public static void main(String[] args) throws Exception {
if (filename.endsWith(".pdf")) {
PdfUtil.pdf2multiImageFile(filepath, "pdf2png.png",130);// 文件输出到某个路径下面
} else {// WORD就先生成pdf在生成快照
byte[] pdfBytes = new AsposeUtil().toPdf(filename, new FileInputStream(filepath + filename));
final byte[] bytes =
PdfUtil.pdfBytes2multiImageBytes(pdfBytes, 130);
final FileOutputStream fileOutputStream = new FileOutputStream("word2png.png");
fileOutputStream.write(bytes);
}
}
}
运行完这个main方法你就会发现大功告成!