java使用pdfbox拆分PDF文件
引入pdfbox依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.25</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jbig2-imageio</artifactId>
<version>3.0.2</version>
</dependency>
1.按起始页和间隔分隔PDF
/**
* 按起始页、结束页和间隔分隔PDF
* @throws IOException
*/
public static void splitPdfByStartEnd() throws IOException {
/**
* 按起始页和间隔分隔PDF
* @throws IOException
*/
public static void splitPdfByStartEnd() throws IOException {
Splitter splitter = new Splitter();
//2 设置起始页、结束页,每个文件的页数
splitter.setStartPage(1);
splitter.setEndPage(5);
splitter.setSplitAtPage(1);//从1-5页间隔为1进行分隔,分割成5个新的PDF
String sourcePath = "D:\\data\\03-软考下午科目.pdf";
PDDocument source = PDDocument.load(new File(sourcePath));
List<PDDocument> list = splitter.split(source);
int i = 0;
for (PDDocument document : list) {
i++;
String targetFile = "D:\\data\\pdfsplit\\";
if (!new File(targetFile).exists()){
new File(targetFile).mkdirs();
}
document.save(targetFile + i + ".pdf");
}
}
2.仅按起始页和分隔间隔对pdf拆分
/**
* @return void
* @Description 仅按起始页和分隔间隔对pdf拆分
* @Param sourceFile 要拆分的pdf文件, targetFile 结果输出目录, pageIndex 从哪里开始拆分, totalPages 总共拆出多少页
**/
public static void splitPdfWithPageIndex(File sourceFile, String targetFile, Integer pageIndex, Integer totalPages) {
try {
PDDocument document = PDDocument.load(sourceFile);
PDDocument doc = new PDDocument();
PDPage page = null;
for (int index = pageIndex; index < pageIndex + totalPages; index++) {
page = document.getPage(index - 1);
doc.addPage(page);
}
doc.save(targetFile);
document.close();
doc.close();
}catch (IOException e){
e.printStackTrace();
}
}