packageDemo.qd;importcom.itextpdf.awt.geom.Rectangle2D.Float;importcom.itextpdf.text.pdf.PdfDictionary;importcom.itextpdf.text.pdf.PdfName;importcom.itextpdf.text.pdf.PdfReader;import com.itextpdf.text.pdf.parser.*;importjava.io.File;importjava.io.FileInputStream;importjava.io.IOException;importjava.math.BigDecimal;importjava.util.ArrayList;importjava.util.List;public classPdfPositionTool {public static void main(String[] args) throwsException {
PdfPositionTool pdfPositionTool= newPdfPositionTool();
List positions = pdfPositionTool.getPositions("/Users/yourouniu/Desktop/111.pdf", "%盖章处%");if (positions != null && positions.size() > 0) {for (double[] position : positions) {
System.out.println("pageNum: " + (int) position[0]);
System.out.println("x: " + position[1]);
System.out.println("y: " + position[2]);
}
}
}/***@returnList 坐标数组:float[0]:页码,float[1]:x ,float[2]:y
* @Description 获取关键字坐标
* @Param filePath:pdf 路径
* @Param keyword:关键字*/
public List getPositions(String filePath, String keyword) throwsIOException {
PdfPositionTool pdfPositionTool= newPdfPositionTool();//1.给定文件
File pdfFile = newFile(filePath);//2.定义一个byte数组,长度为文件的长度
byte[] pdfData = new byte[(int) pdfFile.length()];//3.IO流读取文件内容到byte数组
FileInputStream inputStream = null;try{
inputStream= newFileInputStream(pdfFile);
inputStream.read(pdfData);
}catch(IOException e) {throwe;
}finally{if (inputStream != null) {try{
inputStream.close();
}catch(IOException e) {
}
}
}//5.调用方法,给定关键字和文件
List positions =pdfPositionTool.findKeywordPostions(pdfData, keyword);returnpositions;
}/*** @Description pdf 坐标转换为 ofd 坐标,比值为 25.4/72 ,该转换存在误差
* 最好的转换方式为按距离原点的百分比计算*/
private double transForPosition(doublepdfPosition) {double ofdPosition = pdfPosition * 25.4 / 72;returnofdPosition;
}/***@parampdfData 通过IO流 PDF文件转化的byte数组
*@paramkeyword 关键字
*@returnList : float[0]:pageNum float[1]:x float[2]:y
*@throwsIOException*/
public List findKeywordPostions(byte[] pdfData, String keyword) throwsIOException {
List result = new ArrayList<>();
List pdfPageContentPositions =getPdfContentPostionsList(pdfData);for(PdfPageContentPositions pdfPageContentPosition : pdfPageContentPositions) {
List charPositions =findPositions(keyword, pdfPageContentPosition);if (charPositions == null || charPositions.size() < 1) {continue;
}
result.addAll(charPositions);
}returnresult;
}private List getPdfContentPostionsList(byte[] pdfData) throwsIOException {
PdfReader reader= newPdfReader(pdfData);
List result = new ArrayList<>();int pages =reader.getNumberOfPages();for (int pageNum = 1; pageNum <= pages; pageNum++) {float width =reader.getPageSize(pageNum).getWidth();float height =reader.getPageSize(pageNum).getHeight();
PdfRenderListener pdfRenderListener= newPdfRenderListener(pageNum, width, height);//解析pdf,定位位置
PdfContentStreamProcessor processor = newPdfContentStreamProcessor(pdfRenderListener);
PdfDictionary pageDic=reader.getPageN(pageNum);
PdfDictionary resourcesDic=pageDic.getAsDict(PdfName.RESOURCES);try{
processor.processContent(ContentByteUtils.getContentBytesForPage(reader, pageNum), resourcesDic);
}catch(IOException e) {
reader.close();throwe;
}
String content=pdfRenderListener.getContent();
List charPositions =pdfRenderListener.getcharPositions();
List positionsList = new ArrayList<>();for(CharPosition charPosition : charPositions) {double[] positions = new double[]{charPosition.getPageNum(), charPosition.getX(), charPosition.getY()};
positionsList.add(positions);
}
PdfPageContentPositions pdfPageContentPositions= newPdfPageContentPositions();
pdfPageContentPositions.setContent(content);
pdfPageContentPositions.setPostions(positionsList);
result.add(pdfPageContentPositions);
}
reader.close();returnresult;
}private static ListfindPositions(String keyword, PdfPageContentPositions pdfPageContentPositions) {
List result = new ArrayList<>();
String content=pdfPageContentPositions.getContent();
List charPositions =pdfPageContentPositions.getPositions();for (int pos = 0; pos
}double[] postions =charPositions.get(positionIndex);
result.add(postions);
pos= positionIndex + 1;
}returnresult;
}private classPdfPageContentPositions {privateString content;private Listpositions;publicString getContent() {returncontent;
}public voidsetContent(String content) {this.content =content;
}public ListgetPositions() {returnpositions;
}public void setPostions(Listpositions) {this.positions =positions;
}
}private class PdfRenderListener implementsRenderListener {private intpageNum;private floatpageWidth;private floatpageHeight;private StringBuilder contentBuilder = newStringBuilder();private List charPositions = new ArrayList<>();public PdfRenderListener(int pageNum, float pageWidth, floatpageHeight) {this.pageNum =pageNum;this.pageWidth =pageWidth;this.pageHeight =pageHeight;
}public voidbeginTextBlock() {
}/*** @Description 计算转换后的 ofd 坐标值
* 如有需要,可转为计算距离原点的百分比值。在知道 ofd 长宽的情况下,用百分比重新计算坐标更精确*/
public voidrenderText(TextRenderInfo renderInfo) {
List characterRenderInfos =renderInfo.getCharacterRenderInfos();for(TextRenderInfo textRenderInfo : characterRenderInfos) {
String word=textRenderInfo.getText();if (word.length() > 1) {
word= word.substring(word.length() - 1, word.length());
}
Float rectangle=textRenderInfo.getAscentLine().getBoundingRectange();float x = (float) rectangle.getX();float y = (float) rectangle.getY();//这两个是关键字在所在页面的XY轴的百分比
float xPercent = Math.round(x / pageWidth * 10000) /10000f;//pdf 原点在左下,ofd 原点在左上
float yPercent = Math.round((1 - y / pageHeight) * 10000) /10000f;
CharPosition charPosition= newCharPosition(pageNum, transForPosition(x),
transForPosition((yPercent)*pageHeight));
charPositions.add(charPosition);
contentBuilder.append(word);
}
}public voidendTextBlock() {
}public voidrenderImage(ImageRenderInfo renderInfo) {
}publicString getContent() {returncontentBuilder.toString();
}public ListgetcharPositions() {returncharPositions;
}
}private classCharPosition {private int pageNum = 0;private double x = 0;private double y = 0;public CharPosition(int pageNum, double x, doubley) {this.pageNum =pageNum;this.x =x;this.y =y;
}public intgetPageNum() {returnpageNum;
}public doublegetX() {returnx;
}public doublegetY() {returny;
}
@OverridepublicString toString() {return "[pageNum=" + this.pageNum + ",x=" + this.x + ",y=" + this.y + "]";
}
}
}