itextpdf获取关键字所在坐标位置以及添加印章、水印等

一、简介
iText是著名的开放源码的站点sourceforge一个项目,是用于生成PDF文档的一个java类库。通过iText不仅可以生成PDF或rtf的文档,而且可以将XML、Html文件转化为PDF文件。

maven依赖导入

<dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.5.13</version>
        </dependency>
        <!--支持中文水印-->
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itext-asian</artifactId>
            <version>5.2.0</version>
        </dependency>

定义PdfHelper类

package com.example.demo.pdf;

import com.itextpdf.text.Element;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.*;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;

import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;

public class PdfHelper {

    /**
     * @Description 用于供外部类调用获取关键字所在PDF文件坐标
     * @param filepath
     * @param keyWords
     * @return float[]
     */
    public static MatchItem getKeyWordsByPath(String filepath, String keyWords) {
        MatchItem matchItem = null;
        try{
            PdfReader pdfReader = new PdfReader(filepath);
            matchItem = getKeyWords(pdfReader, keyWords);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return matchItem;
    }

    /**
     * @Description 获取关键字所在PDF坐标
     * @param pdfReader
     * @param keyWords
     * @return float[]
     */
    private static MatchItem getKeyWords(PdfReader pdfReader, String keyWords) {
        int page = 0;
        try{
            int pageNum = pdfReader.getNumberOfPages();
            PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);
            CustomRenderListener renderListener = new CustomRenderListener();
            renderListener.setKeyWord(keyWords);
            StringBuilder allText = null;
            for (page = 1; page <= pageNum; page++) {
                renderListener.setPage(page);
                pdfReaderContentParser.processContent(page, renderListener);
                List<MatchItem> matchItems = renderListener.getMatchItems();
                if(matchItems != null && matchItems.size() > 0) {
                    //完全匹配
                    return matchItems.get(0);
                }
                List<MatchItem> allItems = renderListener.getAllItems();
                allText = new StringBuilder();
                for (MatchItem item : allItems) {
                    allText.append(item.getContent());
                    //关键字存在连续多个块中
                    if(allText.indexOf(keyWords) != -1) {
                        return item;
                    }
                }


            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }


    //添加图片
    public static void andImage(int pageNum, float x, float y){
        try {
            PdfReader reader = new PdfReader("D:\\合同.pdf");
            PdfStamper stamp = new PdfStamper(reader, new FileOutputStream("D:\\合同1.pdf"));

            Image img = Image.getInstance("D:\\1.jpg");
            img.setAbsolutePosition(x, y);
            PdfContentByte under = stamp.getUnderContent(pageNum);
            under.addImage(img);
            stamp.close();
            reader.close();
        }catch (Exception e) {
            e.printStackTrace();
        }

    }

    //添加水印
    public static void andWatermark(int pageNum, float x, float y){
        try {
            PdfReader reader = new PdfReader("D:\\合同.pdf");
            PdfStamper stamp = new PdfStamper(reader, new FileOutputStream("D:\\合同1.pdf"));

            //文字水印
            PdfContentByte over = stamp.getOverContent(pageNum);
            // 设置透明度
            PdfGState gs = new PdfGState();
            gs.setFillOpacity(0.1f);
            over.beginText();
            //BaseFont bf = BaseFont.createFont(BaseFont.HELVETICA, BaseFont.WINANSI, BaseFont.EMBEDDED);
            BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);
            over.setGState(gs);
            over.setFontAndSize(bf, 18);
            over.setTextMatrix(1, 1);
            over.showTextAligned(Element.ALIGN_CENTER, "小龙有限公司", x, y, 45);
            over.endText();
            stamp.close();
            reader.close();
        }catch (Exception e) {
            e.printStackTrace();
        }

    }


    public static void main(String[] args) throws Exception {
        MatchItem matchItem = getKeyWordsByPath("D:\\合同.pdf", "六零");
        System.out.println("x:" + matchItem.getX() + "y:" + matchItem.getY() + "页数:" + matchItem.getPageNum());
        //andImage(matchItem.getPageNum(), matchItem.getX(), matchItem.getY() + 50);
        andWatermark(matchItem.getPageNum(), matchItem.getX(), matchItem.getY());
    }
}

package com.example.demo.pdf;

import com.itextpdf.awt.geom.Rectangle2D;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;

import java.util.ArrayList;
import java.util.List;

public class CustomRenderListener implements RenderListener {

    /**定位坐标的关键字*/
    private String keyWord;
    /**关键字所在的页数*/
    private int page;
    //所有匹配的项
    private List<MatchItem> matchItems = new ArrayList<>();
    //所有项
    private List<MatchItem> allItems = new ArrayList<>();

    public String getKeyWord() {
        return keyWord;
    }

    public void setKeyWord(String keyWord) {
        this.keyWord = keyWord;
    }

    public int getPage() {
        return page;
    }

    public void setPage(int page) {
        this.page = page;
    }

    public List<MatchItem> getMatchItems() {
        return matchItems;
    }

    public void setMatchItems(List<MatchItem> matchItems) {
        this.matchItems = matchItems;
    }

    public List<MatchItem> getAllItems() {
        return allItems;
    }

    public void setAllItems(List<MatchItem> allItems) {
        this.allItems = allItems;
    }

    @Override
    public void beginTextBlock() {

    }

    @Override
    public void renderText(TextRenderInfo textRenderInfo) {
        String text = textRenderInfo.getText();
        Rectangle2D.Float boundingRectange = textRenderInfo.getBaseline().getBoundingRectange();
        MatchItem matchItem = new MatchItem();
        matchItem.setContent(text);
        matchItem.setPageNum(page);
        matchItem.setX(boundingRectange.x);
        matchItem.setY(boundingRectange.y);
        if (null != text && !" ".equals(text)) {
            if(text.equalsIgnoreCase(keyWord)) {
                matchItems.add(matchItem);
            }
        }
        allItems.add(matchItem);
    }

    @Override
    public void endTextBlock() {

    }

    @Override
    public void renderImage(ImageRenderInfo imageRenderInfo) {

    }
}

package com.example.demo.pdf;

public class MatchItem {
    private String content;
    private int pageNum;
    private float x;
    private float y;

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public int getPageNum() {
        return pageNum;
    }

    public void setPageNum(int pageNum) {
        this.pageNum = pageNum;
    }

    public float getX() {
        return x;
    }

    public void setX(float x) {
        this.x = x;
    }

    public float getY() {
        return y;
    }

    public void setY(float y) {
        this.y = y;
    }
}

  • 2
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
这个问题需要用到 iTextSharp 库来解决。首先,你需要安装该库并导入它。然后,你可以使用以下代码获取指定关键字坐标位置信息: ```c# using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using iTextSharp.text.pdf; using iTextSharp.text.pdf.parser; namespace PdfKeywordCoordinates { class Program { static void Main(string[] args) { string filename = @"C:\example.pdf"; // pdf 文件路径 string keyword = "example keyword"; // 指定关键字 using (PdfReader reader = new PdfReader(filename)) { for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new LocationTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(reader, page, strategy); if (currentText.Contains(keyword)) { var kwLocation = new List<RectAndText>(); var renderFilter = new RenderFilter[1]; renderFilter[0] = new RegionTextRenderFilter(new Rectangle(0, 0, 1000, 1000)); var textExtractionStrategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), renderFilter); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(textExtractionStrategy); processor.ProcessContent(reader.GetPageContent(page)); kwLocation = ((LocationTextExtractionStrategy)textExtractionStrategy).GetLocations(); foreach (RectAndText rectAndText in kwLocation) { if (rectAndText.text.Contains(keyword)) { Console.WriteLine("Page: " + page + " X: " + rectAndText.rect.Left + " Y: " + rectAndText.rect.Bottom); } } } } } Console.ReadLine(); } } public class RectAndText { public iTextSharp.text.Rectangle rect; public String text; public RectAndText(iTextSharp.text.Rectangle rect, String text) { this.rect = rect; this.text = text; } } } ``` 这个代码将在指定的 PDF 文件中查找指定的关键字,并输出该关键字在每一页中的坐标位置信息。注意,这个代码是使用 C# 编写的,如果你使用的是 Python,你需要使用 Python 版本的 iTextSharp 库,并使用相应的语法来实现相同的功能。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值