前言
在制作印章的时候 通过后端计算印章位置坐标,需要 提供pdf页面关键字 即可找到此关键字在pdf中的页数以及x,y 坐标
一、两个基础配置类
1.KeyWordPositionListener pdf签名帮助类
代码如下(示例):
package com.z.boot.core.util.pdf;
import com.itextpdf.awt.geom.Rectangle2D;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;
import java.util.ArrayList;
import java.util.List;
/**
* @ClassName KeyWordPositionListener
* @Description pdf签名帮助类
* @Date: 2021/3/17 16:57
* @Version 1.0
**/
public class KeyWordPositionListener implements RenderListener{
private List matches = new ArrayList();
private List allItems = new ArrayList();
private Rectangle curPageSize;
/**
* 匹配的关键字
*/
private String keyword;
/**
* 匹配的当前页
*/
private Integer pageNumber;
public void beginTextBlock() {
//do nothing
}
public void renderText(TextRenderInfo renderInfo) {
String content = renderInfo.getText();
content = content.replace("<", "").replace("《", "").replace("(", "").replace("(", "").replace("\"", "").replace("'", "")
.replace(">", "").replace("》", "").replace(")", "").replace(")", "").replace("、", "").replace(".", "")
.replace(":", "").replace(":", "").replace(" ", "");
Rectangle2D.Float boundingRectange = renderInfo.getBaseline().getBoundingRectange();
MatchItem item = new MatchItem();
item.setContent(content);
item.setPageNum(pageNumber);
item.setPageWidth(curPageSize.getWidth()); //页面宽度
item.setPageHeight(curPageSize.getHeight()); //页面高度
item.setX(boundingRectange.x);
item.setY(boundingRectange.y);
item.setRectangeWidth(boundingRectange.getWidth()); //当前块级元素占的宽度
item.setRectangeHeight(boundingRectange.getHeight()); //当前块级元素占的高度
if(content!=null && content!=""){
if(content.equalsIgnoreCase(keyword)) {
matches.add(item);
}
}
allItems.add(item);//先保存所有的项
}
public void endTextBlock() {
//do nothing
}
public void renderImage(ImageRenderInfo renderInfo) {
//do nothing
}
/**
* 设置需要匹配的当前页
* @param pageNumber
*/
public void setPageNumber(Integer pageNumber) {
this.pageNumber = pageNumber;
}
/**
* 设置需要匹配的关键字,忽略大小写
* @param keyword
*/
public void setKeyword(String keyword) {
this.keyword = keyword;
}
/**
* 返回匹配的结果列表
* @return
*/
public List getMatches() {
return matches;
}
void setCurPageSize(Rectangle rect) {
this.curPageSize = rect;
}
public List getAllItems() {
return allItems;
}
public void setAllItems(List allItems) {
this.allItems = allItems;
}
}
2.MatchItem
代码如下(示例):
package com.z.boot.core.util.pdf;
/**
* @ClassName MatchItem
* @Description
* @Date: 2021/3/17 16:57
* @Version 1.0
**/
public class MatchItem {
private Integer pageNum;
private Float x;
private Float y;
private Float pageWidth;
private Float pageHeight;
private String content;
private double rectangeWidth; //块级元素宽度
private double rectangeHeight; //块级元素高度
public Integer getPageNum() {
return pageNum;
}
public void setPageNum(Integer pageNum) {
this.pageNum = pageNum;
}
public Float getX() {
return x;
}
public void setX(Float x) {
this.x = x;
}
public Float getY() {
return y;
}
public void setY(Float y) {
this.y = y;
}
public Float getPageWidth() {
return pageWidth;
}
public void setPageWidth(Float pageWidth) {
this.pageWidth = pageWidth;
}
public Float getPageHeight() {
return pageHeight;
}
public void setPageHeight(Float pageHeight) {
this.pageHeight = pageHeight;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public double getRectangeWidth() {
return rectangeWidth;
}
public void setRectangeWidth(double rectangeWidth) {
this.rectangeWidth = rectangeWidth;
}
public double getRectangeHeight() {
return rectangeHeight;
}
public void setRectangeHeight(double rectangeHeight) {
this.rectangeHeight = rectangeHeight;
}
@Override
public String toString() {
return "MatchItem [pageNum=" + pageNum + ", x=" + x + ", y=" + y
+ ", pageWidth=" + pageWidth + ", pageHeight=" + pageHeight
+ ", content=" + content + "]";
}
}
二、核心计算类
package com.z.boot.core.util.pdf;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import lombok.extern.slf4j.Slf4j;
import java.io.IOException;
import java.util.List;
/**
* @ClassName QuotationPdf
* @Description
* @Date: 2021/3/16 10:44
* @Version 1.0
**/
@Slf4j
public class QuotationPdf {
/**
* @param filepath
* @param keyWords
* @return float[]
* @Date 2021/3/17 16:57
* @Description 用于供外部类调用获取关键字所在PDF文件坐标
* @Version 1.0
*/
public static MatchItem getKeyWordsByPath(String filepath, String keyWords) throws Exception {
try {
PdfReader pdfReader = new PdfReader(filepath);
int pageCount= pdfReader.getNumberOfPages();
return matchPage(pdfReader,pageCount, keyWords);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
/**
* 在文件中寻找指定的文字内容
*
* @param reader
* @param pageNumber
* @param keyword
* @return
* @throws Exception
*/
public static MatchItem matchPage(PdfReader reader,
Integer pageNumber, String keyword) throws Exception {
MatchItem matchItem = null;
PdfReaderContentParser parse = new PdfReaderContentParser(reader);
KeyWordPositionListener renderListener = new KeyWordPositionListener();
renderListener.setKeyword(keyword);
int page = 0;
for (page = 1; page <= pageNumber; page++) {
renderListener.setPageNumber(page);
Rectangle rectangle = reader.getPageSize(page);
renderListener.setCurPageSize(rectangle);
parse.processContent(page, renderListener);
matchItem = findKeywordItems(renderListener, keyword);
if(null != matchItem) {
matchItem.setPageNum(page);
break;
}
}
reader.close();
return matchItem;
}
/**
* 找到匹配的关键词块
*
* @param renderListener
* @param keyword
* @return
*/
public static MatchItem findKeywordItems(KeyWordPositionListener renderListener,
String keyword) {
// 先判断本页中是否存在关键词
List allItems = renderListener.getAllItems();// 所有块LIST
StringBuilder sbtemp = new StringBuilder("");
for (int i = 0; i < allItems.size(); i++) {// 将一页中所有的块内容连接起来组成一个字符串。(空格用“正”代替)
sbtemp.append(((MatchItem) allItems.get(i)).getContent());
}
int index = sbtemp.toString().indexOf(keyword);
System.out.println(sbtemp.toString());
if (index == -1) {// 一页组成的字符串没有关键词,直接return
return null;
}
//获取关键词与块内容完全匹配的项
List matches = renderListener.getMatches();
if(null == matches || matches.size()<=0){
//不存在,取本页中连续 拼接块内容 第一个关键字的位置
//当拼接的长度超过或等于关键字在此页内容中出现的位置,即为找到
sbtemp = new StringBuilder("");
for (int i = 0; i < allItems.size(); i++) {
sbtemp.append(((MatchItem) allItems.get(i)).getContent());
if(sbtemp.toString().length() >= index+1){
matches.add(allItems.get(i));
break;
}
}
}
//第二种方式,跟上面是一样的,可以自行研究
// if(null == matches || matches.size()<=0){
// //若还为空,第二种情况:多个块内容拼成一个关键词 取连续的几个块内容拼接起来
// //1.若关键字包含context继续拼接;2.若关键字等于context即找到
// sbtemp = new StringBuffer("");
// int ItmeIndex = 0; //块级元素计数器
// for (int i = 0; i < allItems.size(); i++) {
// String itemText = ((MatchItem) allItems.get(i)).getContent();
// sbtemp.append(itemText);
// if(keyword.contains(sbtemp.toString()) ){
//
// }else if(keyword.equals(sbtemp.toString())){
// matches.add(allItems.get(i));
// break;
// }else if(sbtemp.toString().contains(keyword)){
// //被块元素包含
// int index1 = itemText.indexOf(keyword);
// double rectangeWidth = ((MatchItem) allItems.get(i)).getRectangeWidth(); //块级元素的宽度
// double oneWidth = rectangeWidth/itemText.length(); //单个字体的宽度
// double finalWidth = (index1+Math.rint(keyword.length()/2) ) * oneWidth + ((MatchItem) allItems.get(i)).getX();
// ((MatchItem) allItems.get(i)).setX((float) finalWidth);
// matches.add(allItems.get(i));
// break;
// }else{
// sbtemp = new StringBuffer("");
// }
// }
// }
if(null != matches && matches.size()>0){
return (MatchItem) matches.get(0);
}else{
return null;
}
}
/**
* @param filepath
* @return float[]
* @Date 18:24 2020/3/7
* @Description 用于供外部类获取PDF高度和宽度
*/
public static float[] getWidthAndHeightByPath(String filepath, int page) {
float[] coordinate = null;
try {
PdfReader pdfReader = new PdfReader(filepath);
Rectangle pageSize = pdfReader.getPageSize(page);
float height = pageSize.getHeight();
float width = pageSize.getWidth();
System.out.println("width = " + width + ", height = " + height);
float[] coordinate1 = {width, height};
pdfReader.close();
return coordinate1;
// Document document = new Document(pdfReader.getPageSize(1));
Document document = new Document(PageSize.A4.rotate(), 0, 0, 30, 0);
// // 获取页面宽度
// float widths = document.getPageSize().getWidth();
// // 获取页面高度
// float heights = document.getPageSize().getHeight();
// System.out.println("widths = "+widths+", heights = "+heights);
} catch (IOException e) {
e.printStackTrace();
return coordinate;
}
}
public static void main(String[] args) throws Exception {
String filePath = "E:\\work\\bin\\wenlinworkspace\\repository\\laboratoryFile\\message\\wordtemp\\2020-12-14\\8ae123961e5d47faa4acee6d066efcf7.pdf";
MatchItem a = getKeyWordsByPath(filePath, "bm_g_12443_1#_ResultData_32");
System.out.println("width = " + a.getX()
+ ", height = " + a.getY()
+ ",page = " + a.getPageNum());
//
// float[] b = getWidthAndHeightByPath(filePath, Integer.parseInt(a.get("page").toString()));
System.out.println(a.getPageWidth()+"-----"+a.getPageHeight());
float x = (a.getX() - 35F) / a.getPageWidth();
float y = (a.getY() - 52.5F) / a.getPageHeight();
System.out.println("x = " + x + ", y = " + y);
}
}
三、使用
public static void main(String[] args) throws Exception {
String filePath = "E:\\work\\bin\\wenlinworkspace\\repository\\laboratoryFile\\message\\wordtemp\\2020-12-14\\8ae123961e5d47faa4acee6d066efcf7.pdf";
MatchItem a = getKeyWordsByPath(filePath, "bm_g_12443_1#_ResultData_32");
System.out.println("width = " + a.getX()
+ ", height = " + a.getY()
+ ",page = " + a.getPageNum());
//
// float[] b = getWidthAndHeightByPath(filePath, Integer.parseInt(a.get("page").toString()));
System.out.println(a.getPageWidth()+"-----"+a.getPageHeight());
float x = (a.getX() - 35F) / a.getPageWidth();
float y = (a.getY() - 52.5F) / a.getPageHeight();
System.out.println("x = " + x + ", y = " + y);
}
MatchItem position = QuotationPdf.getKeyWordsByPath(pdfPath,"合同章_01");
//position 里面包含了 所能用到的所有参数 横纵长度,以及页面长宽
//页面比例 需自行计算