通过 Maven 仓库安装 Spire.PDF for Java 工具包实现
在pom.xml文件中配置Maven仓库路径。
<repositories>
<repository>
<id>com.e-iceblue</id>
<url>https://repo.e-iceblue.cn/repository/maven-public/</url>
</repository>
</repositories>
然后,在pom.xml文件中指定Spire.PDF for Java的Maven依赖。
<dependencies>
<dependency>
<groupId> e-iceblue </groupId>
<artifactId>spire.pdf</artifactId>
<version>3.11.6</version>
</dependency>
</dependencies>
<dependencies>
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.pdf.free</artifactId>
<version>3.9.0</version>
</dependency>
</dependencies>
package com.ymh.pdf;
/**
* @ClassName PdfToWord
* @Description: TODO
* @Author yangminghao
* @Date 2022/2/8
**/
import com.spire.pdf.*;
import com.spire.pdf.graphics.PdfMargins;
import lombok.extern.slf4j.Slf4j;
import javax.imageio.ImageIO;
import java.awt.geom.Dimension2D;
import java.awt.image.BufferedImage;
import java.io.*;
//import com.spire.doc.Document;
//import com.spire.doc.FileFormat;
/**
* 把pdf转换为
*
* @author Angin
* @date 2019/3/18 0018.
* https://www.cnblogs.com/Yesi/tag/Spire.PDF%20for%20Java/
*/
@Slf4j
public class PdfToWord {
public static void main(String[] args) {
//PDFtoimage("F:\\cs\\imga.pdf","png");
//convertPdfToWord("F:\\cs\\33.pdf");
//convertPdfToAll("svgs","F:\\cs\\33.pdf");
PdfToWord pdf = new PdfToWord();
pdf.PDFtoPDFA("F:\\cs\\test.pdf");
}
/**
* pdf 转
* @param cmd
* @param pdfPath
*/
public void convertPdfToAll(String cmd,String pdfPath) {
cmd = cmd.toLowerCase();
log.info("===============PDF 转 "+cmd+" 工具===========");
log.info("pdf原文件地址:"+pdfPath);
log.info("pdf >> "+cmd+" 转换开始》》》》》,大文件转换需要点时间,请耐心等待.............");
PdfDocument pdf = new PdfDocument();
pdf.loadFromFile(pdfPath);
String pdfWordPath = pdfPath.substring(0, pdfPath.indexOf(".")) + ".";
switch(cmd){
case "word" :
pdfWordPath = pdfWordPath + "docx";
pdf.saveToFile(pdfWordPath, FileFormat.DOCX);
break; //可选
case "html" :
pdfWordPath = pdfWordPath + "html";
pdf.saveToFile(pdfWordPath, FileFormat.HTML);
break;
case "svg" ://转为单个svg
pdfWordPath = pdfWordPath + "svg";
pdf.saveToFile(pdfWordPath, FileFormat.SVG);
break;
case "svgs" ://多页pdf转为一个svg
pdfWordPath = pdfWordPath + "svg";
pdf.getConvertOptions().setOutputToOneSvg(true);
pdf.saveToFile(pdfWordPath, FileFormat.SVG);
break;
case "xps" ://多页pdf转为一个svg
pdfWordPath = pdfWordPath + "xps";
pdf.saveToFile(pdfWordPath, FileFormat.XPS);
break;
// case "ofd" ://多页pdf转为一个svg
// pdfWordPath = pdfWordPath + "ofd";
// log.info("转换后文件地址:"+pdfWordPath);
// pdf.saveToFile(pdfWordPath, FileFormat.OFD);
// break;
default : //可选
log.error("不支持的文件格式》》》"+cmd);
}
log.info("转换后文件地址:"+pdfWordPath);
pdf.close();
log.info("==================转换结束 end================");
}
/**
* pdf >> word转换
*/
public void convertPdfToWord(String pdfPath) {
log.info("===============PDF 转 word 工具===========");
log.info("pdf原文件地址:"+pdfPath);
String pdfWordPath = pdfPath.substring(0, pdfPath.indexOf(".")) + ".doc";
log.info("pdf >> word转换开始》》》》》,大文件转换需要点时间,请耐心等待.............");
PdfDocument pdf = new PdfDocument();
pdf.loadFromFile(pdfPath);
pdf.saveToFile(pdfWordPath, FileFormat.DOCX);
log.info("转换word后文件地址:"+pdfWordPath);
pdf.close();
log.info("==================转换结束 end================");
}
/**
* pdf >> image
* @param pdfPath
*/
public void PDFtoimage(String pdfPath,String formatName){
log.info("===============PDF 转 image 工具===========");
log.info("pdf原文件地址:"+pdfPath);
log.info(" pdf >> image 支持的图片格式包括Jpeg, Jpg, Png, Bmp, Tiff, Gif, EMF等");
log.info("pdf >> image转换开始》》》》》,大文件转换需要点时间,请耐心等待.............");
PdfDocument pdf = new PdfDocument(pdfPath);
BufferedImage image;
for(int i = 0; i< pdf.getPages().getCount();i++){
image = pdf.saveAsImage(i);
// File file = new File( String.format("ToImage-img-%d.png", i));
String pdfWordPath = pdfPath.substring(0, pdfPath.indexOf("."));
pdfWordPath = String.format(pdfWordPath+"-img-%d."+formatName.toLowerCase(), i);
log.info("转换image后文件地址:"+pdfWordPath);
File file = new File( pdfWordPath);
try {
// ImageIO.write(image, "PNG", file);
ImageIO.write(image, formatName.toUpperCase(), file);
} catch (IOException e) {
log.error("转换出错了。。。。。"+e.getMessage());
e.printStackTrace();
}
}
pdf.close();
log.info("==================转换结束 end================");
}
/**
* pdf >>> html
* @param pdfPath
*/
public void pdfToHtml(String pdfPath) {
log.info("===============PDF 转 html 工具===========");
log.info("pdf原文件地址:"+pdfPath);
String pdfWordPath = pdfPath.substring(0, pdfPath.indexOf(".")) + ".html";
log.info("转换 html 后文件地址:"+pdfWordPath);
log.info("pdf >> html 转换开始》》》》》,大文件转换需要点时间,请耐心等待.............");
PdfDocument pdf = new PdfDocument();
pdf.loadFromFile(pdfPath);
pdf.saveToFile(pdfWordPath, FileFormat.HTML);
pdf.close();
log.info("==================转换结束 end================");
}
/**
* 读取pdf文件数据
* @param pdfPath
*/
public void ExtractText (String pdfPath) {
log.info("===============读取pdf文件数据===========");
log.info("pdf原文件地址:"+pdfPath);
String pdfWordPath = pdfPath.substring(0, pdfPath.indexOf(".")) + ".";
log.info("读取pdf文件数据开始》》》》》,大文件转换需要点时间,请耐心等待.............");
//加载测试文档
PdfDocument pdf = new PdfDocument(pdfPath);
//实例化StringBuilder类
StringBuilder sb = new StringBuilder();
//定义一个int型变量
int index = 0;
//遍历PDF文档中每页
PdfPageBase page;
for (int i= 0; i<pdf.getPages().getCount();i++) {
page = pdf.getPages().get(i);
//调用extractText()方法提取文本
sb.append(page.extractText(true));
FileWriter writer;
try {
//将StringBuilder对象中的文本写入到txt
writer = new FileWriter(pdfWordPath+"_ExtractText.txt");
writer.write(sb.toString());
writer.flush();
} catch (IOException e) {
e.printStackTrace();
}
//调用extractImages方法获取图片
for (BufferedImage image : page.extractImages()) {
//指定输出图片名,指定图片格式
File output = new File(String.format(pdfWordPath+"_Image_%d.png", index++));
try {
ImageIO.write(image, "PNG", output);
} catch (IOException e) {
e.printStackTrace();
}
}
}
log.info("读取pdf文件数据地址:"+pdfWordPath);
pdf.close();
log.info("==================转换结束 end================");
}
/**
* PDF转PDF/A
* @param pdfPath
*/
public void PDFtoPDFA(String pdfPath) {
log.info("===============PDF 转 Pdf_A_1_B 工具===========");
log.info("pdf原文件地址:"+pdfPath);
String pdfWordPath = pdfPath.substring(0, pdfPath.indexOf(".")) + "_Pdf_A_1_B.pdf";
log.info("pdf >> Pdf_A_1_B 转换开始》》》》》,大文件转换需要点时间,请耐心等待.............");
//加载测试文档
PdfDocument pdf = new PdfDocument();
pdf.loadFromFile(pdfPath);
//转换为Pdf_A_1_B格式
PdfNewDocument newDoc = new PdfNewDocument();
newDoc.setConformance(PdfConformanceLevel.Pdf_A_1_B);
PdfPageBase page;
for ( int i=0;i< pdf.getPages().getCount();i++) {
page = pdf.getPages().get(i);
Dimension2D size = page.getSize();
PdfPageBase p = newDoc.getPages().add(size, new PdfMargins(0));
page.createTemplate().draw(p, 0, 0);
}
//保存结果文件
newDoc.save(pdfWordPath);
log.info("转换 Pdf_A_1_B 后文件地址:"+pdfWordPath);
newDoc.close();
log.info("==================转换结束 end================");
}
}