使用阿里云的图片识别成表格ocr（将图片表格转换成excel）

最新推荐文章于 2025-01-20 09:49:15 发布

Basic_XYZ

最新推荐文章于 2025-01-20 09:49:15 发布

阅读量4.8k

点赞数 1

分类专栏：第三方工具的使用文章标签： java 阿里巴巴 alibaba

本文链接：https://blog.csdn.net/qq_37126480/article/details/111581488

版权

第三方工具的使用同时被 2 个专栏收录

4 篇文章

订阅专栏

微服务

4 篇文章

订阅专栏

文字的ocr识别

图片识别识别成表格表格识别 ocr
使用阿里云api
购买（印刷文字识别-表格识别/OCR文字识别） https://market.aliyun.com/products/57124001/cmapi024968.html
获得阿里云图片识别表格的appcode

excel格式要注意：

转换结果可以选择 html json excel 三种格式，如果选择的excel 返回的是base64编码的文件，需要按照整合代码的最下面 decodeBase64 去解码

官方效果如下：

返回的html页面的样式

整合代码如下：

package com.guojiangcloud.controller.alibabaOcrdemo;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import org.apache.http.HttpResponse;
import org.apache.http.util.EntityUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import static org.apache.commons.codec.binary.Base64.encodeBase64;
import static sun.plugin2.util.SystemUtil.decodeBase64;


/**
 * 使用APPCODE进行云市场ocr服务接口调用
 */

public class APPCodeDemo {

    /*
     * 获取参数的json对象
     */
    public static JSONObject getParam(int type, String dataValue) {
        JSONObject obj = new JSONObject();
        try {
            obj.put("dataType", type);
            obj.put("dataValue", dataValue);
        } catch (JSONException e) {
            e.printStackTrace();
        }
        return obj;
    }

    /**
     * 需要更换的参数
     *  1. 自己图片的位置
     *  2. 要保存的图片路径
     *  3. 阿里的appCode
     *  4. HttpUtils 文件需要从对应地址下载  https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/src/main/java/com/aliyun/api/gateway/demo/util/HttpUtils.java
     */
    public static void main(String[] args){
        // 阿里的appCode码  需要识别的图片地址  保存图片的文件夹
        String imgFile = "F:\\test.jpg";
        String saveExcel = "F:\\ocr\\"; //图片名称都是 时间戳.xlsx
        String appcode = "阿里的appCode码";  //阿里的appCode码

        // 阿里api
        String host = "https://form.market.alicloudapi.com";
        String path = "/api/predict/ocr_table_parse";
        Boolean is_old_format = false;//如果文档的输入中含有inputs字段，设置为True， 否则设置为False
        //请根据线上文档修改configure字段
        JSONObject configObj = new JSONObject();
        configObj.put("format", "xlsx"); //输出格式 html/json/xlsx
        configObj.put("finance", false);
        configObj.put("dir_assure", false); //图片方向是否确定是正向的: true(确定)/false(不确定)  //是否无线条: true(无线条,或者只有横线没有竖线)/false(有线条)
        String config_str = configObj.toString();
        String method = "POST";
        Map<String, String> headers = new HashMap<String, String>();
        //最后在header中的格式(中间是英文空格)为Authorization:APPCODE 83359fd73fe94948385f570e3c139105
        headers.put("Authorization", "APPCODE " + appcode);
        Map<String, String> querys = new HashMap<String, String>();

        // 对图像进行base64编码
        String imgBase64 = "";
        try {
            File file = new File(imgFile);
            byte[] content = new byte[(int) file.length()];
            FileInputStream finputstream = new FileInputStream(file);
            finputstream.read(content);
            finputstream.close();
            imgBase64 = new String(encodeBase64(content));
        } catch (IOException e) {
            e.printStackTrace();
            return;
        }

        // 拼装请求body的json字符串
        JSONObject requestObj = new JSONObject();
        try {
            if(is_old_format) {
                JSONObject obj = new JSONObject();
                obj.put("image", getParam(50, imgBase64));
                if(config_str.length() > 0) {
                    obj.put("configure", getParam(50, config_str));
                }
                JSONArray inputArray = new JSONArray();
                inputArray.add(obj);
                requestObj.put("inputs", inputArray);
            }else{
                requestObj.put("image", imgBase64);
                if(config_str.length() > 0) {
                    requestObj.put("configure", config_str);
                }
            }
        } catch (JSONException e) {
            e.printStackTrace();
        }
        String bodys = requestObj.toString();

        try {
            /**
             * 重要提示如下:
             * HttpUtils请从
             * https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/src/main/java/com/aliyun/api/gateway/demo/util/HttpUtils.java
             * 下载
             *
             * 相应的依赖请参照
             * https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/pom.xml
             */
            HttpResponse response = HttpUtils.doPost(host, path, method, headers, querys, bodys);
            int stat = response.getStatusLine().getStatusCode();
            if(stat != 200){
                System.out.println("Http code: " + stat);
                System.out.println("http header error msg: "+ response.getFirstHeader("X-Ca-Error-Message"));
                System.out.println("Http body error msg:" + EntityUtils.toString(response.getEntity()));
                return;
            }

            String res = EntityUtils.toString(response.getEntity());
            JSONObject res_obj = JSON.parseObject(res);
            if(is_old_format) {
                JSONArray outputArray = res_obj.getJSONArray("outputs");
                String output = outputArray.getJSONObject(0).getJSONObject("outputValue").getString("dataValue");
                res_obj = JSON.parseObject(output);
            }

            //将结果写入excel
            if (res_obj.containsKey("success") && res_obj.getBoolean("success")) {
                long timeMillis = System.currentTimeMillis();
                String saveFile = saveExcel+String.valueOf(timeMillis)+".xlsx";
                File file = new File(saveFile);
                if (!file.getParentFile().exists()) {
                    file.getParentFile().mkdirs();
                }
                FileOutputStream out = new FileOutputStream(file);
                byte excel[] = decodeBase64(res_obj.getString("tables").replace("\\n", ""));
                out.write(excel);
                out.flush();
                out.close();
                System.out.println(file.getName());
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}