使用maven引入pdfbox依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
<version>1.8.11</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>xmpbox</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>preflight</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>2.0.0</version>
</dependency>
public static void main(String[] args) {
//要读取的pdf文档位置
String path = "C:/Users/Administrator/Desktop/123.pdf";
//File file = new File(path);
try {
//XSSFWorkbook workbook = new XSSFWorkbook();
//XSSFSheet sheet =workbook.createSheet();
//加载pdf文件,创建PDDocument对象
PDDocument document = PDDocument.load(file);
//创建pdf文本获取对象PDFTextStripper
PDFTextStripper pdfStripper = new PDFTextStripper();
//获取pdf中所有信息,text中包含的就是当前pdf文档中所有信息
String text = pdfStripper.getText(document);
//根据自己的业务需求处理text中的信息
//String data =text.replaceAll("([\\u4e00-\\u9fa5]|[\\uff0c]|[\\u3001]|[\\u3002])\\s+([\\u4e00-\\u9fa5])","$1$2");
//String [] str=data.split("\\n");
/**for(int i=0;i< str.length;i++){
XSSFRow row=sheet.createRow(i);
String string1=str[i].replaceAll(" ","");
String[] str2=string1.split(" ");
if(str2.length>1){
for(int j=0;j<str2.length;j++){
if(str2[j].equals(null)||str2[j].equals("")){
continue;
}
XSSFCell cell=row.createCell(j);
String data1=str2[j].replaceAll("\\r\\n|\\r|\\n","");
cell.setCellValue(data1);
}
}else {
XSSFCell cell=row.createCell(0);
cell.setCellValue(string1);
}
}
FileOutputStream fileOutputStream=new FileOutputStream("C:/Users/Administrator/Desktop/ccs.xlsx");
workbook.write(fileOutputStream);
fileOutputStream.close();*/
} catch (Exception e) {
}
}
注释的内容是将pdf文件中的信息读取到excel表格,实际操作可按照自己业务需求进行