1、下载jar包
spire.pdf-4.10.2.jar
2、读取数据
public class PdfTest {
public static void main(String []args) throws Exception {
String path = "/Users/macbook/Downloads/894.pdf";
PdfDocument pdf = new PdfDocument(path);
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
for (int pageIndex = 0; pageIndex < pdf.getPages().getCount(); pageIndex++) {
PdfTable[] tableLists = extractor.extractTable(pageIndex);
if (tableLists != null && tableLists.length > 0) {
for (PdfTable table : tableLists) {
for (int i = 0; i < table.getRowCount(); i++) {
List<String> list = new ArrayList<String>();
for (int j = 0; j < table.getColumnCount(); j++) {
String text = table.getText(i, j);
System.out.println(text+"-----行坐标:"+i+"------列坐标:"+j);
if (i>=2){
if(j == 0 ){
text = replaceBlank(text);
System.out.println("这是处理后的字符串"+text);
}
list.add(text);
}
}
list.forEach(data ->{
System.out.println("数据内容:"+data);
});
}
}
}
}
}
public static String replaceBlank(String str) {
String dest = "";
if (str!=null) {
Pattern p = Pattern.compile("\\s*|\t|\r|\n");
Matcher m = p.matcher(str);
dest = m.replaceAll("");
}
return dest;
}
}