使用腾讯表格文字识别（TableOCR）

最新推荐文章于 2024-08-16 09:18:22 发布

凡沙

最新推荐文章于 2024-08-16 09:18:22 发布

阅读量5.4k

点赞数 1

分类专栏： Idea Oracle 文章标签：腾讯图片文字识别

本文链接：https://blog.csdn.net/qq_43580052/article/details/96334632

版权

Oracle 同时被 2 个专栏收录

4 篇文章 0 订阅

订阅专栏

Idea

3 篇文章 0 订阅

订阅专栏

之前按照公司要求，根据表格图片读取订单内容，将订单内容进行保存数据库

一、网址搜索腾讯云平台

1.找到产品中的文字识别
2.操作方法按照腾讯提供的文字说明来
https://cloud.tencent.com/document/product/866/34936
3.点击控制台申请秘钥
在这里插入图片描述
二、代码实现部分，这里我抽成了一个工具类

1.调用腾讯接口util

package com.fansha.tencentcloudtest.domain.util;

import com.fansha.tencentcloudtest.domain.tencentcloudapi.v20181119.OcrClient;
import com.fansha.tencentcloudtest.domain.tencentcloudapi.v20181119.models.TableOCRRequest;
import com.fansha.tencentcloudtest.domain.tencentcloudapi.v20181119.models.TableOCRResponse;
import com.tencentcloudapi.common.Credential;
import com.tencentcloudapi.common.exception.TencentCloudSDKException;
import com.tencentcloudapi.common.profile.ClientProfile;
import com.tencentcloudapi.common.profile.HttpProfile;
import sun.misc.BASE64Decoder;

import java.io.*;
import java.util.List;

/**
 * @Author: FanSha
 * @Date: 2019/7/1 17:02
 * @Version 1.0
 * @Description:
 * @Copyright: Copyright (c) 2019
 * @Company: www.123start.cn
 */
public class TencentRequestUtil {

    public static byte[] invokeTransaction(String imgBase64) {
        // 实例化一个认证对象，入参需要传入腾讯云账户secretId，secretKey
        Credential cred = new Credential(Constant.TX_SECRET_ID, Constant.TX_SECRET_KEY);
        // 设置访问域名
        // SDK会自动指定域名。通常是不需要特地指定域名的，但是如果您访问的是金融区的服务，
        // 则必须手动指定域名，例如云服务器的上海金融区域名： tbaas.ap-shanghai-fsi.tencentcloudapi.com
        HttpProfile httpProfile = new HttpProfile();
        httpProfile.setEndpoint(Constant.TX_ENDPOINT);
        // 实例化tableOCR的client对象
        // 第二个参数是地域信息，根据资源所属地域填写相应的地域信息，比如广州地域的资源可以直接填写字符串ap-guangzhou，或者引用预设的常量
        ClientProfile clientProfile = new ClientProfile();
        clientProfile.setHttpProfile(httpProfile);
        OcrClient client = new OcrClient(cred, Constant.TX_REGION, clientProfile);
        TableOCRRequest tableOCRRequest = new TableOCRRequest();
        tableOCRRequest.setImageBase64(imgBase64);
        TableOCRResponse tableOCRResponse = null;
        try {
            tableOCRResponse = client.TableOCR(tableOCRRequest);
        } catch (TencentCloudSDKException e) {
            e.printStackTrace();
        }
        String imageBase = tableOCRResponse.getData().replaceAll("\r\n", "");
        BASE64Decoder decoder = new BASE64Decoder();
        byte[] bytes = new byte[0];
        try {
            bytes =  decoder.decodeBuffer(imageBase);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return bytes;
    }

    public static void main(String[] args) {
        String imageBase64 = BASE64Util.encodeImgageToBase64(new File("c:\\Users\\fansha\\Desktop\\图片\\订单识别\\1563238347(1).png"));
        byte[] bytes = new byte[0];
        try {
            bytes = invokeTransaction(imageBase64);
            InputStream in = new ByteArrayInputStream(bytes);
            List<List<String>> list = ImportExcelUtil.readExcel(in, false);
            for (List<String> list1:list) {
                for (String s:list1) {
                    System.out.println(s);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

2.读取返回excel内容util

package com.fansha.tencentcloudtest.domain.util;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

/**
 * @Author: FanSha
 * @Date: 2019/7/17 14:27
 * @Version 1.0
 * @Description:
 * @Copyright: Copyright (c) 2019
 * @Company: www.123start.cn
 */
public class ImportExcelUtil {

    /**
     * 总行数
     */

    private static int totalRows = 0;

    /**
     * 总列数
     */

    private static int totalCells = 0;

    /**
     * 错误信息
     */

    private static String errorInfo;

    /**
     * 构造方法
     */

    public ImportExcelUtil() {
    }

    /**
     * @描述：得到总行数
     * @参数：@return
     * @返回值：int
     */
    public int getTotalRows() {
        return totalRows;
    }

    /**
     * @描述：得到总列数
     * @参数：@return
     * @返回值：int
     */
    public int getTotalCells() {
        return totalCells;
    }

    /**
     * @描述：得到错误信息
     * @参数：@return
     * @返回值：String
     */
    public String getErrorInfo() {
        return errorInfo;
    }

    /**
     * @描述：验证excel文件
     * @参数：@param filePath　文件完整路径
     * @参数：@return
     * @返回值：boolean
     */
    public static boolean validateExcel(String filePath) {
        /** 检查文件名是否为空或者是否是Excel格式的文件 */
        if (filePath == null || !(WDWUtil.isExcel2003(filePath) || WDWUtil.isExcel2007(filePath))) {
            errorInfo = "文件名不是excel格式";
            return false;
        }

        /** 检查文件是否存在 */
        File file = new File(filePath);
        if (file == null || !file.exists()) {
            errorInfo = "文件不存在";
            return false;
        }
        return true;
    }

    /**
     * @描述：判断excel文件是2007还是2003
     * @参数：@param filePath 文件完整路径
     * @参数：@return
     * @返回值：List
     */
    public static List<List<String>> booleanExcelType(String filePath) {
        List<List<String>> dataLst = new ArrayList<List<String>>();
        InputStream is = null;
        try {
            /** 验证文件是否合法 */
            if (!validateExcel(filePath)) {
                System.out.println(errorInfo);
                return null;
            }
            /** 判断文件的类型，是2003还是2007 */
            boolean isExcel2003 = true;
            if (WDWUtil.isExcel2007(filePath)) {
                isExcel2003 = false;
            }
            /** 调用本类提供的根据流读取的方法 */
            File file = new File(filePath);
            is = new FileInputStream(file);
            dataLst = readExcel(is, isExcel2003);
            is.close();
        } catch (Exception ex) {
            ex.printStackTrace();
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    is = null;
                    e.printStackTrace();
                }
            }
        }
        /** 返回最后读取的结果 */
        return dataLst;
    }

    /**
     * @描述：根据流读取Excel文件
     * @参数：@param inputStream
     * @参数：@param isExcel2003
     * @参数：@return
     * @返回值：List
     */
    public static List<List<String>> readExcel(InputStream inputStream, boolean isExcel2003) {
        List<List<String>> dataLst = null;
        try {
            /** 根据版本选择创建Workbook的方式 */
            Workbook wb = null;
            if (isExcel2003) {
                wb = new HSSFWorkbook(inputStream);
            } else {
                wb = new XSSFWorkbook(inputStream);
            }
            dataLst = readData(wb);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return dataLst;
    }

    /**
     * @描述：读取数据
     * @参数：@param Workbook
     * @参数：@return
     * @返回值：List<List<String>>
     */
    private static List<List<String>> readData(Workbook wb) {
        List<List<String>> dataLst = new ArrayList<List<String>>();
        /** 得到第一个shell */
        Sheet sheet = wb.getSheetAt(0);
        /** 得到Excel的行数 */
        totalRows = sheet.getPhysicalNumberOfRows();
        /** 得到Excel的列数 */
        if (totalRows >= 1 && sheet.getRow(0) != null) {
            totalCells = sheet.getRow(0).getPhysicalNumberOfCells();
        }

        /** 循环Excel的行 */
        for (int r = 0; r < totalRows; r++) {
            Row row = sheet.getRow(r);
            if (row == null) {
                continue;
            }
            List<String> rowLst = new ArrayList<String>();
            /** 循环Excel的列 */
            for (int c = 0; c < new ImportExcelUtil().getTotalCells(); c++) {
                Cell cell = row.getCell(c);
                String cellValue = "";
                if (null != cell) {
                    // 以下是判断数据的类型
                    switch (cell.getCellType()) {
                        case HSSFCell.CELL_TYPE_NUMERIC: // 数字
                            cellValue = cell.getNumericCellValue() + "";
                            break;
                        case HSSFCell.CELL_TYPE_STRING: // 字符串
                            cellValue = cell.getStringCellValue();
                            break;
                        case HSSFCell.CELL_TYPE_BOOLEAN: // Boolean
                            cellValue = cell.getBooleanCellValue() + "";
                            break;
                        case HSSFCell.CELL_TYPE_FORMULA: // 公式
                            cellValue = cell.getCellFormula() + "";
                            break;
                        case HSSFCell.CELL_TYPE_BLANK: // 空值
                            cellValue = "";
                            break;
                        case HSSFCell.CELL_TYPE_ERROR: // 故障
                            cellValue = "非法字符";
                            break;
                        default:
                            cellValue = "未知类型";
                            break;
                    }
                }
                rowLst.add(cellValue);
            }
            /** 保存第r行的第c列 */
            dataLst.add(rowLst);
        }
        return dataLst;
    }
}

/**
 * @描述：工具类
 */
class WDWUtil {
    /**
     * @描述：是否是2003的excel，返回true是2003
     * @参数：@param filePath　文件完整路径
     * @参数：@return
     * @返回值：boolean
     */
    public static boolean isExcel2003(String filePath) {
        return filePath.matches("^.+\\.(?i)(xls)$");
    }

    /**
     * @描述：是否是2007的excel，返回true是2007
     * @参数：@param filePath　文件完整路径
     * @参数：@return
     * @返回值：boolean
     */
    public static boolean isExcel2007(String filePath) {
        return filePath.matches("^.+\\.(?i)(xlsx)$");
    }
}

三、总结

1.关于腾讯表格文字识别还是比较简单的，但是识别效果比较差，不建议识别插库！！！
参考：https://www.cnblogs.com/f-young/p/11024505.html

凡沙

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
使用腾讯表格文字识别（TableOCR）

之前按照公司要求，根据表格图片读取订单内容，将订单内容进行保存数据库一、网址搜索腾讯云平台1.找到产品中的文字识别2.操作方法按照腾讯提供的文字说明来https://cloud.tencent.com/document/product/866/349363.点击控制台申请秘钥二、代码实现部分，这里我抽成了一个工具类1.调用腾讯接口utilpackage com.fansha.te...
复制链接

扫一扫

专栏目录