从pdf中取出数据以string的形式返回给前台,用的是PDFBox
1.导入依赖
<!--PDFbox-->
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.15</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/fontbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.15</version>
</dependency>
2.实现代码
public static String getDataFromPDF(String pdfPath) {
String result = null;
FileInputStream fis = null;
PDDocument doc= null;
try {
fis = new FileInputStream(pdfPath);
PDFParser parser = new PDFParser(new RandomAccessBuffer(fis));
parser.parse();
doc = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
result = stripper.getText(doc);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (fis != null) {
fis.close();
}
if (doc != null) {
doc.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return result;
}
public static void main(String[] args) {
String str = getDataFromPDF("E:\\demo\\test.pdf");
}