使用pdfbox进行PDF合并、拆分，获取pdf文件属性

最新推荐文章于 2023-09-22 15:45:06 发布

randy老师

最新推荐文章于 2023-09-22 15:45:06 发布

阅读量1.5k

点赞数 1

本文链接：https://blog.csdn.net/luyaoran/article/details/113178777

版权

maven配置

<dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.22</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>fontbox</artifactId>
            <version>2.0.22</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>jempbox</artifactId>
            <version>1.8.11</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>xmpbox</artifactId>
            <version>2.0.22</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>preflight</artifactId>
            <version>2.0.22</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox-tools</artifactId>
            <version>2.0.22</version>
        </dependency>

示例代码


import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * PDF工具类
 *
 */
public class PDFUtils {
    /**
     * 对pdf文件按照指定页码进行文件的拆分，每一页拆分成一个新的pdf文件
     * @param pdfFile 原Pdf文件全路径
     * @param newFile 拆分后的文件全路径（文件保存路径和文件名称）
     *                传入null或者空的话，将使用原文件路径和原文件名。
     * @param from 从第几页开始拆分 传入0或负数，将从第一页开始拆分
     * @param end 从第几页结束拆分 传入0或负数，将拆分之文档最后一页
     * @return true（文件合并成功）、false（文件合并失败）
     */
    public static boolean partitionPdfFile(String pdfFile,String newFile,int from ,int end) {
        if(ToolUtil.isEmpty(pdfFile)) {
            throw new RuntimeException("pdfFile 不能为空");
        }
        if(!pdfFile.endsWith(".pdf") && !pdfFile.endsWith(".PDF")) {
            throw new RuntimeException("pdfFile 必须为pdf文件");
        }
        if(ToolUtil.isNotEmpty(newFile) && !newFile.endsWith(".pdf") && !newFile.endsWith(".PDF")) {
            throw new RuntimeException("newFile 必须为pdf文件");
        }
        File file = new File(pdfFile);
        PDDocument document = null;
        try {
            document = PDDocument.load(file);
            Splitter splitter = new Splitter();
            List<PDDocument> pages = splitter.split(document);
            //处理新传入的文件名称
            newFile = ToolUtil.isEmpty(newFile)?pdfFile:newFile;
            if(from>0&& end>0 && from >end) {
                throw new RuntimeException("参数from、end均为正整数时，from不能大于end");
            }
            //去除新文件名中的后缀
            int suffixIndex = 0;
            if(newFile.endsWith(".pdf"))
            {
                suffixIndex = newFile.lastIndexOf(".pdf");
            } else if(newFile.endsWith(".PDF")){
                suffixIndex = newFile.lastIndexOf(".PDF");
            }
            if(suffixIndex >0) {
                newFile = newFile.substring(0,suffixIndex);
            }
            if(newFile.lastIndexOf("\\") == -1 || !new File(newFile.substring(0,newFile.lastIndexOf("\\"))).isDirectory()) {
                throw new RuntimeException("参数newFile:"+newFile+",格式不正确");
            }
            //根据传入的参数对文件列表进行筛选
            from = from <=0 || from>pages.size()?0:from-1;
            end = end<=0 || end>pages.size()?pages.size():end;
            pages = pages.subList(from,end);
            //对拆分后的文件进行命名、保存
            for(int i=0;i<pages.size();i++) {
                PDDocument pd = pages.get(i);
                pd.save(newFile+"-"+(i+1)+".pdf");
            }
            return true;
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            try {
                if(null != document) document.close();
            }catch (IOException ioe) {
                ioe.printStackTrace();
            }
        }
        return false;
    }

    /**
     * 将多个pdf文档合并为一个新的pdf文档
     * @param pdfFiles 要进行合并的pdf文件数组
     * @param newFile 合并后的文件全路径
     * @return true（文件合并成功）、false（文件合并失败）
     */
    public static boolean mergePdfFile(String[] pdfFiles,String newFile) {
        List<File>files = new ArrayList<>();
        if(ToolUtil.isEmpty(pdfFiles)) {
            throw new RuntimeException("pdfFiles 不能为空");
        }
        if(ToolUtil.isEmpty(newFile)) {
            throw new RuntimeException("newFile 不能为空");
        } else if(!newFile.endsWith(".pdf") && !newFile.endsWith(".PDF")) {
            throw new RuntimeException("newFile 必须为pdf文件");
        }
        try
        {
            for(int i=0;i<pdfFiles.length;i++) {
                if(!pdfFiles[i].endsWith(".pdf") && !pdfFiles[i].endsWith(".PDF")) {
                    throw new RuntimeException(pdfFiles[i]+",文件格式不是pdf");
                }
                File f= new File(pdfFiles[i]);
                if(!f.exists()) {
                    throw new RuntimeException(f.getPath()+",不存在");
                }
                files.add(f);
            }
            //Instantiating PDFMergerUtility class
            PDFMergerUtility PDFmerger = new PDFMergerUtility();
            //Setting the destination file
            PDFmerger.setDestinationFileName(newFile);
            //adding the source files
            for(File file:files) {
                PDFmerger.addSource(file);
            }
            //合并pdf
            PDFmerger.mergeDocuments();
            return true;
        }catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    public static void setPdfFileInfo(String pdfFile,String key,String info) throws IOException{
        if (ToolUtil.isEmpty(pdfFile)) {
            throw new RuntimeException("pdfFile 不能为空");
        }
        if (!pdfFile.endsWith(".pdf") && !pdfFile.endsWith(".PDF")) {
            throw new RuntimeException("pdfFile 必须为pdf文件");
        }

        File file = new File(pdfFile);
        PDDocument document = null;
        document = PDDocument.load(file);
        PDDocumentInformation information = document.getDocumentInformation();
        information.setCustomMetadataValue(key,info);
        document.save(pdfFile);
        document.close();
    }

    public static Object getPdfFileInfo(String pdfFile,String key) throws IOException{
        if (ToolUtil.isEmpty(pdfFile)) {
            throw new RuntimeException("pdfFile 不能为空");
        }
        if (!pdfFile.endsWith(".pdf") && !pdfFile.endsWith(".PDF")) {
            throw new RuntimeException("pdfFile 必须为pdf文件");
        }

        File file = new File(pdfFile);
        PDDocument document = null;
        document = PDDocument.load(file);
        PDDocumentInformation information = document.getDocumentInformation();
        Object object = information.getCustomMetadataValue(key);
        document.close();
        return object;
    }


    public static void main(String args[]) throws IOException{
//        //pdf文件拆分
//        PDFUtils.partitionPdfFile("W:\\测试pdf.pdf","W:\\测试pdf1.pdf",1,3);
//        //pdf文件合并
//        String[] files ={"W:\\测试pdf-1.pdf","W:\\测试pdf-2.pdf","W:\\测试pdf-3.pdf","W:\\测试pdf-4.pdf","W:\\测试pdf-5.pdf","W:\\测试pdf-6.pdf","W:\\测试pdf-7.pdf","W:\\测试pdf-8.pdf","W:\\测试pdf-9.pdf"};
//        mergePdfFile(files,"W:\\测试pdf.pdf");
        setPdfFileInfo("W:\\测试pdf.pdf","gs","xxxx11");

        System.out.println(getPdfFileInfo("W:\\测试pdf.pdf","gs"));
    }
}