本文是用iText方式
1、添加maven依赖
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.10</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-asian</artifactId>
<version>5.2.0</version>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15on</artifactId>
<version>1.54</version>
</dependency>
2、PDFReader工具类
package com.yao.utils;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
import java.io.IOException;
/**
* 读取pdf文件,获取合同中的 银行账号
*/
public class PDFReader {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
String result = getPdfFileText("D:\\opt\\temp\\111.pdf");//pdf文件地址,文件需自定义
System.out.println(result);
String[] strarr = result.split("账号为");//截取标识
System.out.println(strarr[0]);
System.out.println(strarr[1]);//获取以账号开头的字符串,如:12345692222 老师的JFK收到
String strAccount = strarr[1].trim().substring(0, 10);//去空格
System.out.println(strAccount);
}
/**
* 读取pdf文件内容
*
*/
public static String getPdfFileText(String fileName) throws IOException {
PdfReader reader = new PdfReader(fileName);
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
StringBuffer buff = new StringBuffer();
TextExtractionStrategy strategy;
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
strategy = parser.processContent(i,
new SimpleTextExtractionStrategy());
buff.append(strategy.getResultantText());
}
return buff.toString();
}
}