- POM依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.4</version>
</dependency>
- 代码
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode;
import org.springframework.util.CollectionUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
public class PDFUtil {
private static File file;
private static String savePath;
private static int lastPage = 1;
private static String lastPageName = "start";
public static void main(String[] args) {
try {
startCut("E:\\9800 告警-21.1.pdf","E:\\test\\");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void startCut(String filePath,String savePath) throws Exception {
PDFUtil.file = new File(filePath);
PDFUtil.savePath = savePath;
FileInputStream fis = new FileInputStream(file);
RandomAccessBuffer randomAccessBuffer = new RandomAccessBuffer(fis);
PDFParser parser = new PDFParser(randomAccessBuffer);
parser.parse();
PDDocument doc = parser.getPDDocument();
PDDocumentCatalog catalog = doc.getDocumentCatalog();
PDDocumentOutline outline = catalog.getDocumentOutline();
PDFUtil util = new PDFUtil();
if (outline != null) {
util.printBookmarks(outline, "");
}
}
private void printBookmarks(PDOutlineNode bookmark, String indentation) throws IOException {
PDOutlineItem current = bookmark.getFirstChild();
while (current != null) {
int pages = 0;
if (current.getDestination() instanceof PDPageDestination) {
PDPageDestination pd = (PDPageDestination) current.getDestination();
pages = pd.retrievePageNumber() + 1;
}
if (current.getAction() instanceof PDActionGoTo) {
PDActionGoTo gta = (PDActionGoTo) current.getAction();
if (gta.getDestination() instanceof PDPageDestination) {
PDPageDestination pd = (PDPageDestination) gta.getDestination();
pages = pd.retrievePageNumber() + 1;
}
}
String title = current.getTitle();
String[] s = title.split(" ");
String name = s[1];
String save = savePath + lastPageName + ".pdf";
List<String> fileList = PDFUtil.partitionPdfFile(file.getAbsolutePath(), save, lastPage, pages - 1);
if (!CollectionUtils.isEmpty(fileList)) {
PDFUtil.mergePdfFile(fileList, save);
for (String s1 : fileList) {
File file1 = new File(s1);
file1.delete();
}
}
if (pages == 0) {
System.out.println(indentation + current.getTitle());
} else {
System.out.println(indentation + current.getTitle() + " " + pages);
}
lastPage = pages;
lastPageName = name;
printBookmarks(current, indentation + " ");
current = current.getNextSibling();
}
}
/**
* 对pdf文件按照指定页码进行文件的拆分,每一页拆分成一个新的pdf文件
*
* @param pdfFile 原Pdf文件全路径
* @param newFile 拆分后的文件全路径(文件保存路径和文件名称)
* 传入null或者空的话,将使用原文件路径和原文件名。
* @param from 从第几页开始拆分 传入0或负数,将从第一页开始拆分
* @param end 从第几页结束拆分 传入0或负数,将拆分之文档最后一页
* @return true(文件合并成功)、false(文件合并失败)
*/
public static List<String> partitionPdfFile(String pdfFile, String newFile, int from, int end) {
if (Objects.isNull(pdfFile)) {
throw new RuntimeException("pdfFile 不能为空");
}
if (!pdfFile.endsWith(".pdf") && !pdfFile.endsWith(".PDF")) {
throw new RuntimeException("pdfFile 必须为pdf文件");
}
if (Objects.nonNull(newFile) && !newFile.endsWith(".pdf") && !newFile.endsWith(".PDF")) {
throw new RuntimeException("newFile 必须为pdf文件");
}
File file = new File(pdfFile);
PDDocument document = null;
try {
document = PDDocument.load(file);
Splitter splitter = new Splitter();
List<PDDocument> pages = splitter.split(document);
//处理新传入的文件名称
newFile = Objects.isNull(newFile) ? pdfFile : newFile;
if (end > 0 && from > end) {
throw new RuntimeException("参数from、end均为正整数时,from不能大于end");
}
//去除新文件名中的后缀
int suffixIndex = 0;
if (newFile.endsWith(".pdf")) {
suffixIndex = newFile.lastIndexOf(".pdf");
} else if (newFile.endsWith(".PDF")) {
suffixIndex = newFile.lastIndexOf(".PDF");
}
if (suffixIndex > 0) {
newFile = newFile.substring(0, suffixIndex);
}
if (newFile.lastIndexOf("\\") == -1 || !new File(newFile.substring(0, newFile.lastIndexOf("\\"))).isDirectory()) {
throw new RuntimeException("参数newFile:" + newFile + ",格式不正确");
}
//根据传入的参数对文件列表进行筛选
from = from <= 0 || from > pages.size() ? 0 : from - 1;
end = end <= 0 || end > pages.size() ? pages.size() : end;
pages = pages.subList(from, end);
//对拆分后的文件进行命名、保存
List<String> name = new ArrayList<>();
for (int i = 0; i < pages.size(); i++) {
PDDocument pd = pages.get(i);
String fileName = newFile + "-" + (i + 1) + ".pdf";
pd.save(fileName);
name.add(fileName);
pd.close();
}
return name;
} catch (Exception ignored) {
} finally {
try {
if (null != document) document.close();
} catch (IOException ignored) {
}
}
return null;
}
/**
* 将多个pdf文档合并为一个新的pdf文档
*
* @param pdfFiles 要进行合并的pdf文件数组
* @param newFile 合并后的文件全路径
* @return true(文件合并成功)、false(文件合并失败)
*/
public static boolean mergePdfFile(List<String> pdfFiles, String newFile) {
List<File> files = new ArrayList<>();
if (Objects.isNull(pdfFiles)) {
throw new RuntimeException("pdfFiles 不能为空");
}
if (Objects.isNull(newFile)) {
throw new RuntimeException("newFile 不能为空");
} else if (!newFile.endsWith(".pdf") && !newFile.endsWith(".PDF")) {
throw new RuntimeException("newFile 必须为pdf文件");
}
try {
for (int i = 0; i < pdfFiles.size(); i++) {
if (!pdfFiles.get(i).endsWith(".pdf") && !pdfFiles.get(i).endsWith(".PDF")) {
throw new RuntimeException(pdfFiles.get(i) + ",文件格式不是pdf");
}
File f = new File(pdfFiles.get(i));
if (!f.exists()) {
throw new RuntimeException(f.getPath() + ",不存在");
}
files.add(f);
}
//Instantiating PDFMergerUtility class
PDFMergerUtility PDFmerger = new PDFMergerUtility();
//Setting the destination file
PDFmerger.setDestinationFileName(newFile);
//adding the source files
for (File file : files) {
PDFmerger.addSource(file);
}
//合并pdf
PDFmerger.mergeDocuments();
return true;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
}
pdfbox JAR包
链接:https://pan.baidu.com/s/1NwY2Hgif5ylFTu68TpGkWg
提取码:t2sc