1.引入jar包
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>2.0.2</version>
</dependency>
2.编写PFD工具类
package com.example.apitest.util;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import java.io.File;
import java.io.IOException;
public class PDFUtil {
String filePath = "";
public PDFUtil(String filePath){
this.filePath = filePath;
}
//读取PDF中的数据
public String parsePDF(){
String result = "";
try {
PDDocument load = PDDocument.load(new File(filePath));
PDFTextStripper stripper = new PDFTextStripper();
stripper.setSortByPosition(true);
for (int i = 1; i < load.getNumberOfPages()+1; i++) {
//读取pdf的开始到结束页
stripper.setStartPage(i);
stripper.setEndPage(i);
String text = stripper.getText(load);
//拼接不同页数的数据返回
result += text;
}
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
//将一个PDF分割成多个
public void splitPDF(){
try {
PDDocument load = PDDocument.load(new File(filePath));
PDFTextStripper stripper = new PDFTextStripper();
stripper.setStartPage(1);
stripper.setEndPage(load.getNumberOfPages());
load.save("D:\\testFile\\读取PDF文件COPY.pdf");
} catch (IOException e) {
e.printStackTrace();
}
}
}
3.调用工具类实现读取功能
private static void testPDF(){
PDFUtil pdfUtil = new PDFUtil("D:\\testFile\\读取PDF文件.pdf");
String s = pdfUtil.parsePDF();
pdfUtil.splitPDF();
//System.out.println(s);
System.out.println("产品:"+s.substring(s.indexOf("产品")+3).split("\\n")[0]);
// System.out.println("价格:"+s.substring(s.indexOf("价格")+3));
// System.out.println("数量:"+s.substring(s.indexOf("数量")+3));
// System.out.println("质量:"+s.substring(s.indexOf("质量")+3));
// System.out.println("生产率:"+s.substring(s.indexOf("生产率")+3));
}