PDF盒存儲部分是由於拆分操作堆中的對象,這會導致堆型PDDocument的對象越來越充滿快,即使你調用在每一輪後的close()操作循環,GC仍然無法以與填充相同的方式回收堆大小。
一種選擇是分裂文件分割操作,以批次,其中每個批次是一個相對管理塊(10〜40頁)中2.0.2
public void execute() {
File inputFile = new File(path/to/the/file.pdf);
PDDocument document = null;
try {
document = PDDocument.load(inputFile);
int start = 1;
int end = 1;
int batchSize = 50;
int finalBatchSize = document.getNumberOfPages() % batchSize;
int noOfBatches = document.getNumberOfPages()/batchSize;
for (int i = 1; i <= noOfBatches; i++) {
start = end;
end = start + batchSize;
System.out.println("Batch: " + i + " start: " + start + " end: " + end);
split(document, start, end);
}
// handling the remaining
start = end;
end += finalBatchSize;
System.out.println("Final Batch start: " + start + " end: " + end);
split(document, start, end);
} catch (IOException e) {
e.printStackTrace();
} finally {
//close the document
}
}
private void split(PDDocument document, int start, int end) throws IOException {
List fileList = new ArrayList();
Splitter splitter = new Splitter();
splitter.setStartPage(start);
splitter.setEndPage(end);
List splittedDocuments = splitter.split(document);
String outputPath = Config.INSTANCE.getProperty("outputPath");
PDFTextStripper stripper = new PDFTextStripper();
for (int index = 0; index < splittedDocuments.size(); index++) {
String pdfFullPath = document.getDocumentInformation().getTitle() + index + start+ ".pdf";
PDDocument splittedDocument = splittedDocuments.get(index);
splittedDocument.save(pdfFullPath);
}
}