IOCR财会自定义文字识别

官方开发文档

https://cloud.baidu.com/doc/OCR/s/Ek3h7xypm

模板创建

官网有教程

识别多位小数据的数据我用的常规字段类型,如果你用小写数字金额类型识别会把最后一位识别为0,或者可能识别不到

依赖

<!-- https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp -->
 <dependency>
      <groupId>com.squareup.okhttp3</groupId>
      <artifactId>okhttp</artifactId>
      <version>4.12.0</version>
</dependency>
   static final String API_KEY = "填你自己的";

    static final String SECRET_KEY = "填你自己的";

    static final OkHttpClient HTTP_CLIENT = new OkHttpClient().newBuilder().build();


@PostMapping(value = "/testProcess",consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
    public List<HashMap<String,String>> testProcessImage(@RequestParam("file")MultipartFile file) throws IOException {

        byte[] imageBytes = file .getBytes();
        okhttp3.MediaType mediaType = okhttp3.MediaType.parse("application/x-www-form-urlencoded");

//      RequestBody.create 第二个参数根据自己的情况来,查官方文档
//      我这里是根据上传的文件内容(还可以根据文件路径)
        RequestBody body = RequestBody.create(mediaType, 
"image="+URLEncoder.encode(Base64Util.encode(imageBytes), "UTF-8")
                +"&templateSign=ace62e52ce968079a92ec15d2544d1e9");
        Request request = new Request.Builder()
                .url("https://aip.baidubce.com/rest/2.0/solution/v1/iocr/recognise/finance?access_token=" + getAccessToken())
                .method("POST", body)
                .addHeader("Content-Type", "application/x-www-form-urlencoded")
                .addHeader("Accept", "application/json")
                .build();
        Response response = HTTP_CLIENT.newCall(request).execute();

        JSONObject jsonObject = new JSONObject(Objects.requireNonNull(response.body()).string());

//      下面是对返回值的解析
//       这里我只获取识别到的值(word_name,word(具体值))
        JSONArray ret = jsonObject.getJSONObject("data").getJSONArray("ret");

        HashMap<String,String> data = new HashMap<>();
        List<HashMap<String,String>> datas = new ArrayList<>();

        System.out.println(ret.length());
        for (int i = 0; i < ret.length()-1 ; i++) {
            JSONObject phoneNumberObj = ret.getJSONObject(i);
            data.put(phoneNumberObj.getString("word_name"),phoneNumberObj.getString("word"));
            data = new HashMap<>();
            datas.add(data);
        }
        return datas;
    }

下面是返回结果的示例

这是我自己的测试模板实例,可根据具体情况改动

{
  "errorCode": 0,
  "errorMsg": "",
  "logId": null,
  "data": {
    "ret": [
      {
        "probability": {
          "average": 0.9952911139,
          "min": 0.976130724,
          "variance": 0.00005427112774
        },
        "location": {
          "top": 3421,
          "left": 2115,
          "width": 37,
          "height": 150
        },
        "word_name": "valley_price",
        "word": "0.220"
      },
      {
        "probability": {
          "average": 0.9979573488,
          "min": 0.9919402003,
          "variance": 0.000006120805665
        },
        "location": {
          "top": 3418,
          "left": 2179,
          "width": 36,
          "height": 150
        },
        "word_name": "flat_price",
        "word": "0.6320"
      },
      {
        "probability": {
          "average": 0.9959807396,
          "min": 0.9883592129,
          "variance": 0.000009529698218
        },
        "location": {
          "top": 1647,
          "left": 2443,
          "width": 43,
          "height": 207
        },
        "word_name": "account_number",
        "word": "0388*12"
      },
      {
        "probability": {
          "average": 0.9951527715,
          "min": 0.9746937156,
          "variance": 0.00006121824845
        },
        "location": {
          "top": 3638,
          "left": 2175,
          "width": 37,
          "height": 147
        },
        "word_name": "flat_charge",
        "word": "918.38"
      },
      {
        "probability": {
          "average": 0.9968527555,
          "min": 0.9824381471,
          "variance": 0.00003009658758
        },
        "location": {
          "top": 3418,
          "left": 2239,
          "width": 36,
          "height": 150
        },
        "word_name": "peak_price",
        "word": "1.030"
      },
      {
        "probability": {
          "average": 0.9971604347,
          "min": 0.9900932908,
          "variance": 0.00001060384693
        },
        "location": {
          "top": 3674,
          "left": 2112,
          "width": 36,
          "height": 111
        },
        "word_name": "valley_charge",
        "word": "43.1"
      },
      {
        "probability": {
          "average": 0.9989025593,
          "min": 0.9978717566,
          "variance": 4.821478683e-7
        },
        "location": {
          "top": 1640,
          "left": 2506,
          "width": 40,
          "height": 117
        },
        "word_name": "month",
        "word": "202401"
      },
      {
        "probability": {
          "average": 0.9845759273,
          "min": 0.8883020878,
          "variance": 0.001324576442
        },
        "location": {
          "top": 3415,
          "left": 2299,
          "width": 40,
          "height": 143
        },
        "word_name": "tip_price",
        "word": "1.2*0"
      },
      {
        "probability": {
          "average": 0.9963476062,
          "min": 0.9907190204,
          "variance": 0.000006976139503
        },
        "location": {
          "top": 3662,
          "left": 2299,
          "width": 36,
          "height": 113
        },
        "word_name": "tip_charge",
        "word": "67.77"
      },
      {
        "probability": {
          "average": 0.9945835471,
          "min": 0.9784607291,
          "variance": 0.00005564975436
        },
        "location": {
          "top": 3631,
          "left": 2235,
          "width": 37,
          "height": 147
        },
        "word_name": "peak_charge",
        "word": "103.24"
      },
      {
        "probability": {
          "average": 0.999364078,
          "min": 0.9970128536,
          "variance": 7.170726235e-7
        },
        "location": {
          "top": 660,
          "left": 2443,
          "width": 43,
          "height": 553
        },
        "word_name": "username",
        "word": "成都****有限责任公司"
      }
    ],
    "templateSign": "ace62e52ce968079a92ec15d2544d1e9",
    "scores": 1,
    "isStructured": true,
    "logId": "171038434275039",
    "version": 14,
    "clockwiseAngle": 269.9
  }
}

/**
     * 从用户的AK,SK生成鉴权签名(Access Token)
     *
     * @return 鉴权签名(Access Token)
     * @throws IOException IO异常
     */
    private static String getAccessToken() throws IOException {
        okhttp3.MediaType mediaType = okhttp3.MediaType.parse("application/x-www-form-urlencoded");
        RequestBody body = RequestBody.create(mediaType, "grant_type=client_credentials&client_id=" + API_KEY
                + "&client_secret=" + SECRET_KEY);
        Request request = new Request.Builder()
                .url("https://aip.baidubce.com/oauth/2.0/token")
                .method("POST", body)
                .addHeader("Content-Type", "application/x-www-form-urlencoded")
                .build();
        Response response = HTTP_CLIENT.newCall(request).execute();
        return new JSONObject(response.body().string()).getString("access_token");
    }

 /**
     * 获取文件base64编码
     *
     * @param path      文件路径
     * @param urlEncode 如果Content-Type是application/x-www-form-urlencoded时,传true
     * @return base64编码信息,不带文件头
     * @throws IOException IO异常
     */
    static String getFileContentAsBase64(String path, boolean urlEncode) throws IOException {
        byte[] b = Files.readAllBytes(Paths.get(path));
        String base64 = Base64.getEncoder().encodeToString(b);
        if (urlEncode) {
            base64 = URLEncoder.encode(base64, "utf-8");
        }
        return base64;
    }

  • 9
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值