1.pom文件
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.12</version>
</dependency>
2.代码
public static String pdfToString(File file) {
String content = null;
InputStream input = null;
PDDocument document = null;
try {
input = new FileInputStream(file);
boolean sort = false;
// 开始提取页数
int startPage = 1;
// 结束提取页数
int endPage = Integer.MAX_VALUE;
document = PDDocument.load(input);
PDFTextStripper pts = new PDFTextStripper();
pts.setSortByPosition(sort);
endPage = document.getNumberOfPages();
//System.out.println("Total Page: " + endPage);
pts.setStartPage(startPage);
pts.setEndPage(endPage);
content = pts.getText(document);
return content;
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidPasswordException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return "";
}