使用Tesseract实现验证码图片内容识别

最新推荐文章于 2024-07-24 14:16:00 发布

Dragon_xll

最新推荐文章于 2024-07-24 14:16:00 发布

阅读量502

点赞数

文章标签： java ocr

本文链接：https://blog.csdn.net/Dragon_xll/article/details/135130707

版权

Tesseract为OCR中最为流行的开源软件,本文仅演示了使用Tesseract识别验证码图片的应用场景，其他使用场景需要自行解锁

简介

本人的学习环境：

操作系统：Mac

java版本：jdk11

（仅作为环境介绍，不一致也无妨）

可应用场景网站示例

测试验证码图片

开发步骤

步骤一：安装Tesseract环境

可去官网查看各个环境的安装教程，本次是使用的Mac版本，windows安装地址

此步骤自行操作

步骤二：添加相关Maven依赖

    <!--<tess4j图片识别>-->
    <dependency>
      <groupId>org.bytedeco.javacpp-presets</groupId>
      <artifactId>tesseract-platform</artifactId>
      <version>4.0.0-1.4.4</version>
    </dependency>

步骤三：java方法代码

/*
       OCR识别-提取计算
    */
    public String ocrQdTown(String savePath, String imageName, String tesseractDataPath) throws Exception{
        log.info("OCR识别-提取计算，开始！" );
        try {
            BytePointer outText;

            tesseract.TessBaseAPI api = new tesseract.TessBaseAPI();
            //s指定安装好的tessdata目录路径，s1指定语言
            //chi_sim为简体中文（需要在安装tessdata过程中手动指定安装chi_sim语言），eng为英文（为默认安装语言）
            if (api.Init(tesseractDataPath, "eng_new") != 0) {
                System.err.println("无法初始化tesseract");
                return null;
            }
            // 放入图片
            lept.PIX image = lept.pixRead(savePath+"/"+imageName);
            api.SetImage(image);
            // 获取OCR结果
            outText = api.GetUTF8Text();

            // 文本识别
            String ocrCodeStr = outText.getString();
            log.info("OCR解析前："+ocrCodeStr);
            String verificationCode = ocrExtCodeQdTown(ocrCodeStr);
            log.info("OCR解析后："+verificationCode);

            // 销毁使用过的对象并释放内存
            api.End();
            api.close();
            outText.deallocate();
            lept.pixDestroy(image);

            return verificationCode;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

验证码提取处理

/*
        解析提取验证码
     */
    public String ocrExtCodeQdTown(String ocrCodeStr){
        try {
            ocrCodeStr = ocrCodeStr.replaceAll("[^(0-9+-.)]", "");
            if(ocrCodeStr.length()!=3){
                return null;
            }
            if(ocrCodeStr.contains("+")){
                Integer ocrCodeInt = Integer.valueOf(ocrCodeStr.split("\\+")[0])+Integer.valueOf(ocrCodeStr.split("\\+")[1]);
                ocrCodeStr = ocrCodeInt+"";
            }else if(ocrCodeStr.contains("-")){
                ocrCodeStr = ocrCodeStr.replaceAll("=","");
                Integer ocrCodeInt = Integer.valueOf(ocrCodeStr.split("\\-")[0])-Integer.valueOf(ocrCodeStr.split("\\-")[1]);
                ocrCodeStr = ocrCodeInt+"";
            }else if(ocrCodeStr.contains(".")){
                ocrCodeStr = ocrCodeStr.replaceAll("=","");
                Integer ocrCodeInt = Integer.valueOf(ocrCodeStr.split("\\.")[0])-Integer.valueOf(ocrCodeStr.split("\\.")[1]);
                ocrCodeStr = ocrCodeInt+"";
            }else{
                return null;
            }
            return ocrCodeStr.trim();
        }catch (Exception e){
            return null;
        }
    }

步骤四：测试启动调用

 public static void main(String[] args) {
        try {
            String verificationCode = new OcrBiz().ocrQdTown("图片实际路径","imgInit.png","tessdata路径");
            System.out.println(verificationCode);
        }catch (Exception e){

        }

    }