Itext7获取关键字在文件中的坐标

目录

        1. maven配置

        2. 实体类

        3. java代码


1. maven配置

<dependency>
	<groupId>cn.hutool</groupId>
	<artifactId>hutool-all</artifactId>
	<version>5.8.11</version>
</dependency>
<!--itext7 pom-->
<dependency>
	<groupId>com.itextpdf</groupId>
	<artifactId>itext7-core</artifactId>
	<version>7.2.0</version>
	<type>pom</type>
</dependency>

2. 实体类

package com.example.demo.itext.model;

import lombok.Data;

import java.io.Serializable;

@Data
public class KeyWordBean implements Comparable<KeyWordBean>, Serializable {

    private float x;
    private float y;
    private float width;
    private float height;
    // pdf的页面
    private int page;
    // 当前页面中第几个
    private int num;
    private String text;

    @Override
    public int compareTo(KeyWordBean o) {
        // 先按照Y轴排序
        int i = (int) (o.getY() - this.getY());
        if (i == 0) {
            // 如果Y轴相等了再按X轴进行排序
            return (int) (this.x - o.getX());
        }
        return i;
    }
}

3. java代码

package com.example.demo.itext.util;

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.json.JSONUtil;
import com.example.demo.itext.model.KeyWordBean;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
import com.itextpdf.kernel.pdf.canvas.parser.listener.RegexBasedLocationExtractionStrategy;

import java.io.IOException;
import java.util.*;

public class ItextPDFUtil {
    public static void main(String args[]) throws IOException {
        String path = "F:\\software\\myfile\\txt12_加水印.pdf";
        System.out.println("关键字在PDF文件中的文字信息:" + JSONUtil.toJsonStr(keyWordLocationMap("负责人签名:", path)));
    }

    /**
     * 功能描述: 获取关键字在pdf中的坐标 <br>
     *
     * @Param: [KEY_WORD:关键字, input:pdf文件路径]
     * @Return: java.util.Map<java.lang.Integer,java.util.List<com.example.demo.itext.model.KeyWordBean>>
     * @Author: lhp
     * @Date: 2023/1/29 14:53
     */
    public static Map<Integer, List<KeyWordBean>> keyWordLocationMap(String KEY_WORD, String input) {
        Map<Integer, List<KeyWordBean>> listMap;
        PdfDocument pdfDocument = null;
        try {
            PdfReader reader = new PdfReader(input);

            pdfDocument = new PdfDocument(reader);
            int pageNumbers = pdfDocument.getNumberOfPages();
            listMap = new HashMap<>(pageNumbers);
            for (int i = 1; i <= pageNumbers; i++) {
                PdfPage page = pdfDocument.getPage(i);
                RegexBasedLocationExtractionStrategy strategy = new RegexBasedLocationExtractionStrategy(KEY_WORD);
                PdfCanvasProcessor canvasProcessor = new PdfCanvasProcessor(strategy);
                canvasProcessor.processPageContent(page);
                Collection<IPdfTextLocation> resultantLocations = strategy.getResultantLocations();

                //自定义结果处理
                if (!resultantLocations.isEmpty()) {
                    List<KeyWordBean> keyWordBeanList = new ArrayList<>();
                    List<IPdfTextLocation> iPdfTextLocationList = CollectionUtil.newArrayList(resultantLocations);
                    for (int m = 0; m < iPdfTextLocationList.size(); m++) {
                        IPdfTextLocation item = iPdfTextLocationList.get(m);
                        Rectangle boundRectangle = item.getRectangle();
                        KeyWordBean keyWordBean = new KeyWordBean();
                        keyWordBean.setPage(item.getPageNumber());
                        keyWordBean.setX(boundRectangle.getX());
                        keyWordBean.setY(boundRectangle.getY());
                        keyWordBean.setWidth(boundRectangle.getWidth());
                        keyWordBean.setHeight(boundRectangle.getHeight());
                        keyWordBean.setText(item.getText());
                        keyWordBean.setNum(m + 1);
                        System.out.println("关键字“" + KEY_WORD + "” 的坐标为 x: " + boundRectangle.getX() + "  ,y: " + boundRectangle.getY());
                        keyWordBeanList.add(keyWordBean);
                    }

                    listMap.put(i, keyWordBeanList);
                }
            }
            pdfDocument.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        } finally {
            if (pdfDocument != null) {
                pdfDocument.close();
            }
        }

        return listMap;
    }

}

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值