效果图
合并a.pdf和b.pdf两个文件,会自动提取待合并的pdf原有的大纲。
aspose.pdf 合并pdf (不推荐、非常耗内存)
/**
* 合并pdf,可以根据别名生成pdf的书签
*
* @param fileRefers
* @param targetPath
*/
public static void mergePdfUseRelative(List<ZipUtil.RelativeFile> fileRefers, String targetPath) {
long startTime = System.currentTimeMillis();
if (FileUtils.newFile(targetPath).exists()) {
FileUtils.newFile(targetPath).delete();
}
com.aspose.pdf.Document resultPdf = new Document();
resultPdf.save(targetPath);
Document mergedDocument = new com.aspose.pdf.Document(targetPath);
// 设置文档信息
DocumentInfo docInfo = new DocumentInfo(mergedDocument);
docInfo.setAuthor("匿名");
docInfo.setKeywords("文件");
docInfo.setProducer("ueh");
int totalPage = 0;
int index = 0;
PageCollection mergedDocumentPages = mergedDocument.getPages();
for (ZipUtil.RelativeFile sourcePdf : fileRefers) {
log.info("开始合并{}, 大小:{}MB, 路径:{}", (++index),
FileUtils.newFile(sourcePdf.getFilePath()).length() / 1024 / 1024,
sourcePdf.getFilePath());
long time1 = System.currentTimeMillis();
Document pdf = new com.aspose.pdf.Document(sourcePdf.getFilePath());
PageCollection pdfPages = pdf.getPages();
mergedDocumentPages.add(pdfPages);
// 每个待合并文件一个一级目录。
// 书签保存在Document对象的OutlineItemCollection集合中,而它本身在OutlineCollection 集合中。
// Create a bookmark object
OutlineItemCollection pdfOutline = new OutlineItemCollection(mergedDocument.getOutlines());
pdfOutline.setTitle(FileUtils.newFile(sourcePdf.getRelativePath()).getName());
pdfOutline.setItalic(true);
pdfOutline.setBold(true);
// Set the destination page number
pdfOutline.setAction(new GoToAction(mergedDocumentPages.get_Item(1 + totalPage)));
// 添加一级目录的子目录(每个文件自己的目录)
// Create PdfBookmarkEditor
PdfBookmarkEditor bookmarkEditor = new PdfBookmarkEditor();
// Open PDF file
bookmarkEditor.bindPdf(sourcePdf.getFilePath());
// Extract bookmarks 读取一级目录
Bookmarks bookmarks = bookmarkEditor.extractBookmarks(true);
// 添加书签
addBookmark(mergedDocument, mergedDocumentPages, pdfOutline, totalPage, bookmarks);
// Add bookmark in the document's outline collection.
mergedDocument.getOutlines().add(pdfOutline);
// 更新总页码
totalPage += pdfPages.size();
// 保存
mergedDocument.save();
log.info("结束合并,当前合并{}页,总{}页:{},耗费:{}秒", pdfPages.size(), mergedDocumentPages.size(),
sourcePdf.getFilePath(), (System.currentTimeMillis() - time1) / 1000);
pdfPages.clear();
pdf.close();
}
mergedDocument.close();
log.info("合并完成:共耗时{}秒", (System.currentTimeMillis() - startTime) / 1000);
}
/**
* 给指定的父亲目录添加子目录
*
* @param mergedDocument 合并后的最终文档
* @param pdfOutline 每个pdf的目录。一级目录。
* @param totalPage 合并 当前后的总页码
* @param bookmarks 每个待合并的小pdf的一级书签
*/
public static void addBookmark(Document mergedDocument, PageCollection mergedDocumentPages,
OutlineItemCollection pdfOutline, int totalPage, Bookmarks bookmarks) {
// 这里是把所有的书签都读取出来了。
for (int i = 0; i < bookmarks.size(); i++) {
Bookmark b = bookmarks.get_Item(i);
// 添加一级目录的子目录
OutlineItemCollection pdfChildOutline = new OutlineItemCollection(mergedDocument.getOutlines());
pdfChildOutline.setTitle(b.getTitle());
pdfChildOutline.setItalic(true);
pdfChildOutline.setBold(true);
pdfChildOutline.setAction(new GoToAction(mergedDocumentPages.get_Item(b.getPageNumber() + totalPage)));
// Add child bookmark in parent bookmark's collection
pdfOutline.add(pdfChildOutline);
// 有子级书签
Bookmarks childItems = b.getChildItems();
if (childItems.size() > 0) {
addBookmark(mergedDocument, mergedDocumentPages, pdfChildOutline, totalPage, childItems);
}
}
}
pdfbox (推荐,内存耗费较小,合并快)
/**
* 合并Pdf文件
*
* @param files 需要合并的文件路径
* @param destFilePath
*/
public static void mergePdf(List<String> files, String destFilePath) {
log.info("开始合并{}, 大小:{}MB, 路径:{}", 0, 0, 0);
long time1 = System.currentTimeMillis();
// pdf合并工具类
PDFMergerUtility mergePdf = new PDFMergerUtility();
// 自定义书签对象
List<PdfBoxBookmark> allBookList = new ArrayList<>();
int totalPage = 0;
int index = 0;
// 添加待合并文件
for (String item : files) {
log.info("开始添加{}, 大小:{}MB, 路径:{}", (++index),
FileUtils.newFile(item).length() / 1024 / 1024, item);
// 读取Document 方式2
// FileInputStream fis = new FileInputStream(FileUtils.newFile(item))
// PDFParser parser = new PDFParser(new RandomAccessBuffer(fis));
// parser.parse();
// PDDocument doc = parser.getPDDocument();
try (PDDocument document = PDDocument.load(FileUtils.newFile(item), MemoryUsageSetting.setupTempFileOnly());
) {
// 页面大纲(1级)
PdfBoxBookmark boxData = new PdfBoxBookmark(FileUtils.newFile(item).getName(), totalPage, null);
PDDocumentOutline outline = document.getDocumentCatalog().getDocumentOutline();
if (outline != null) {
buildBookMark(outline, boxData, totalPage);
}
/
allBookList.add(boxData);
mergePdf.addSource(item);
// 更新总页码
totalPage += document.getNumberOfPages();
} catch (IOException e) {
throw new BaseException("文件不存在:" + item);
}
log.info("结束添加,添加后,总{}页:{},耗费:{}秒",totalPage,
item, (System.currentTimeMillis() - time1) / 1000);
}
// 设置合并后的pdf文件路径
mergePdf.setDestinationFileName(destFilePath);
// 合并pdf
try {
mergePdf.setDocumentMergeMode(PDFMergerUtility.DocumentMergeMode.OPTIMIZE_RESOURCES_MODE);
mergePdf.mergeDocuments(MemoryUsageSetting.setupTempFileOnly());
} catch (IOException e) {
throw new BaseException("合并发生异常");
}
// 添加书签
try (PDDocument mergedDocument = PDDocument.load(FileUtils.newFile(destFilePath),
MemoryUsageSetting.setupTempFileOnly())) {
// 给合并后的文档设置文档大纲(1级目录)
PDDocumentOutline documentOutline = new PDDocumentOutline();
mergedDocument.getDocumentCatalog().setDocumentOutline(documentOutline);
addBookmark(mergedDocument, allBookList, documentOutline, null);
mergedDocument.save(destFilePath);
} catch (IOException e) {
throw new BaseException("文件不存在:");
}
log.info("结束合并,当前合并{}页,总{}页:{},耗费:{}秒", 0, 0,
0, (System.currentTimeMillis() - time1) / 1000);
}
/**
* 添加书签
* @param mergedDocument 合并后的文档对象
* @param allBookList 自定义的书签树
* @param documentOutline
* @param pagesOutline 调用时候传递null
*/
public static void addBookmark(PDDocument mergedDocument, List<PdfBoxBookmark> allBookList,
PDDocumentOutline documentOutline, PDOutlineItem pagesOutline) {
for (PdfBoxBookmark boxData : allBookList) {
// 给每个文档构造 页面大纲(一级书签)
PDPageDestination pageDestination = new PDPageFitWidthDestination();
pageDestination.setPage(mergedDocument.getPage(boxData.getPage()));
PDOutlineItem bookmark = new PDOutlineItem();
bookmark.setDestination(pageDestination);
bookmark.setTitle(boxData.getTitle());
// 需要给每个页面大纲添加子目录 PDOutlineItem
List<PdfBoxBookmark> children = boxData.getChildren();
if (children != null && children.size() > 0) {
addBookmark(mergedDocument, children, documentOutline, bookmark);
}
if (pagesOutline == null) {
// 给文档大纲 添加 页面大纲
documentOutline.addLast(bookmark);
} else {
pagesOutline.addLast(bookmark);
}
}
}
/**
* 构建自定义书签
* @param bookmark
* @param boxData
* @param totalPage
* @throws IOException
*/
public static void buildBookMark(PDOutlineNode bookmark, PdfBoxBookmark boxData, int totalPage) throws IOException {
PDOutlineItem current = bookmark.getFirstChild();
while (current != null) {
int pages = 0;
PDDestination destination = current.getDestination();
if (destination instanceof PDPageDestination) {
PDPageDestination pd = (PDPageDestination) destination;
pages = pd.retrievePageNumber() + totalPage;
}
if (boxData.getChildren() == null) {
boxData.setChildren(new ArrayList<>());
}
List<PdfBoxBookmark> children = boxData.getChildren();
PdfBoxBookmark tmpBox = new PdfBoxBookmark(current.getTitle(), pages, null);
children.add(tmpBox);
// 这里需要判断是否有儿子。有儿子用tmpBox, 没有儿子用boxData
if (current.hasChildren()) {
buildBookMark(current, tmpBox, totalPage);
} else {
buildBookMark(current, boxData, totalPage);
}
boxData.setChildren(children);
current = current.getNextSibling();
}
}