腾讯云OCR识别

qq_58399194

已于 2023-03-08 09:23:55 修改

阅读量5.8k

点赞数 17

文章标签：视觉检测人工智能腾讯云

于 2023-03-07 20:47:02 首次发布

本文链接：https://blog.csdn.net/qq_58399194/article/details/129352235

版权

前言

提示：这里可以添加本文要记录的大概内容：

为了帮助同学们进行OCR识别，本文一共提供如何调用腾讯云的API进行OCR识别的方法，针对六种核心语言（分别包括Python /JAVA /C++ /.net/ Nodejs /Go）分别提供了API接口。下面本文从腾讯云身份验证和腾讯云API接口调用两部分进行介绍如何调用腾讯云OCR API。

一、腾讯云身份验证

通过链接：登录腾讯云官网：https://console.cloud.tencent.com/ocr/v2/overview并完成实名认证并开通OCR服务。

由于开通OCR服务后秘钥位置很隐秘，可以直接通过链接：https://console.cloud.tencent.com/cam/capi进入个人秘钥页面，记录SecretID和SecretKey。

二、使用API

2.1 Python OCR API调用

个人环境配置如下：

pip install -i https://mirrors.tencent.com/pypi/simple/ --upgrade tencentcloud-sdk-python

pip install jsonpath

环境配置好之后，建立python文件，并在文件中使用如下代码(本地照片)：

# -*- coding: utf-8 -*-
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.ocr.v20181119 import ocr_client, models
import base64
import json
import jsonpath
import json
try:
   
    # 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
    cred = credential.Credential("SecretID", "SecretKey")
    # 实例化一个http选项，可选的，没有特殊需求可以跳过
    httpProfile = HttpProfile()
    httpProfile.endpoint = "ocr.tencentcloudapi.com"

    # 实例化一个client选项，可选的，没有特殊需求可以跳过
    clientProfile = ClientProfile()
    clientProfile.httpProfile = httpProfile
    # 实例化要请求产品的client对象,clientProfile是可选的

    client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)
    # 实例化一个请求对象,每个接口都会对应一个request对象
    req = models.RecognizeTableAccurateOCRRequest()
    params = {
        "ImageUrl": "ImageURL"
    }

    image_path = 'ImagePath'

    with open(image_path, 'rb') as f:  # 以二进制读取本地图片
        data = f.read()
        encodestr = str(base64.b64encode(data), 'utf-8')  # base64编码图片
    req.ImageBase64 = encodestr

    # 返回的resp是一个RecognizeTableAccurateOCRResponse的实例，与请求对象对应
    resp = client.RecognizeTableAccurateOCR(req)
    # 输出json格式的字符串回包
    print(resp.to_json_string())

except TencentCloudSDKException as err:
    print(err)

以上代码中只需要换三个位置（1）SecretID换成个人秘钥ID。（2）SecretKey换成个人秘钥Key。（3）更换图片地址:更换Image Path

若是URL图片则使用如下代码：

# -*- coding: utf-8 -*-
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.ocr.v20181119 import ocr_client, models
import base64
import json
import jsonpath
import json
try:
    # 实例化一个认证对象，入参需要传入腾讯云账户 SecretId 和 SecretKey，此处还需注意密钥对的保密
    # 代码泄露可能会导致 SecretId 和 SecretKey 泄露，并威胁账号下所有资源的安全性。以下代码示例仅供参考，建议采用更安全的方式来使用密钥，请参见：https://cloud.tencent.com/document/product/1278/85305
    # 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
    cred = credential.Credential("SecretID", "SecretKey")
    # 实例化一个http选项，可选的，没有特殊需求可以跳过
    httpProfile = HttpProfile()
    httpProfile.endpoint = "ocr.tencentcloudapi.com"

    # 实例化一个client选项，可选的，没有特殊需求可以跳过
    clientProfile = ClientProfile()
    clientProfile.httpProfile = httpProfile
    # 实例化要请求产品的client对象,clientProfile是可选的

    client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)
    # 实例化一个请求对象,每个接口都会对应一个request对象
    req = models.RecognizeTableAccurateOCRRequest()
    params = {
        "ImageUrl": "ImageURL"
    }

    image_path = 'D:\\PycharmProject\\pythonProject2\\R-C (1).jpg'

    with open(image_path, 'rb') as f:  # 以二进制读取本地图片
        data = f.read()
        encodestr = str(base64.b64encode(data), 'utf-8')  # base64编码图片
    req.from_json_string(json.dumps(params))
 

    # 返回的resp是一个RecognizeTableAccurateOCRResponse的实例，与请求对象对应
    resp = client.RecognizeTableAccurateOCR(req)
    # 输出json格式的字符串回包
    print(resp.to_json_string())

except TencentCloudSDKException as err:
    print(err)

2.2 JAVA OCR API调用

同样，调用Java API的先前条件是已经配置好Java IDE的环境，并配置好maven文件。那么你就可以进行接下来的操作来调用OCR。

首先在pom文件中添加依赖：

<dependency>
      <groupId>com.tencentcloudapi</groupId>
      <artifactId>tencentcloud-sdk-java-ocr</artifactId>
      <version>3.1.701</version>
</dependency>

接着，在建立一个新的Java类，文件名为：RecognizeTableAccurateOCR.java

import com.tencentcloudapi.common.Credential;
import com.tencentcloudapi.common.profile.ClientProfile;
import com.tencentcloudapi.common.profile.HttpProfile;
import java.util.Base64;
import com.tencentcloudapi.common.exception.TencentCloudSDKException;
import com.tencentcloudapi.ocr.v20181119.OcrClient;
import com.tencentcloudapi.ocr.v20181119.models.*;
import java.io.*;
public class RecognizeTableAccurateOCR
{
    public static void main(String [] args) {
        try{
            // 实例化一个认证对象，入参需要传入腾讯云账户 SecretId 和 SecretKey，此处还需注意密钥对的保密
            // 代码泄露可能会导致 SecretId 和 SecretKey 泄露，并威胁账号下所有资源的安全性。以下代码示例仅供参考，建议采用更安全的方式来使用密钥，请参见：https://cloud.tencent.com/document/product/1278/85305
            // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
            Credential cred = new Credential("SecretId ", "SecretKey");
            // 实例化一个http选项，可选的，没有特殊需求可以跳过
            HttpProfile httpProfile = new HttpProfile();
            httpProfile.setEndpoint("ocr.tencentcloudapi.com");
            String imagePath = "本地png/jpg文件的绝对地址";
            InputStream inputStream = null;
            byte[] buffer = null;
            //读取图片字节数组
            try {
                inputStream = new FileInputStream(imagePath);
                int count = 0;
                while (count == 0) {
                    count = inputStream.available();
                }
                buffer = new byte[count];
                inputStream.read(buffer);
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                if (inputStream != null) {
                    try {
                        // 关闭inputStream流
                        inputStream.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
            String imageBase64 = Base64.getEncoder().encodeToString(buffer);

            // 实例化一个client选项，可选的，没有特殊需求可以跳过
            ClientProfile clientProfile = new ClientProfile();
            clientProfile.setHttpProfile(httpProfile);
            // 实例化要请求产品的client对象,clientProfile是可选的
            OcrClient client = new OcrClient(cred, "ap-shanghai", clientProfile);
            // 实例化一个请求对象,每个接口都会对应一个request对象
            RecognizeTableAccurateOCRRequest req = new RecognizeTableAccurateOCRRequest();
//            req.setImageUrl("Image_URL");
            req.setImageBase64(imageBase64);
            // 返回的resp是一个RecognizeTableAccurateOCRResponse的实例，与请求对象对应
            RecognizeTableAccurateOCRResponse resp = client.RecognizeTableAccurateOCR(req);
            // 输出json格式的字符串回包
            System.out.println(RecognizeTableAccurateOCRResponse.toJsonString(resp));
        } catch (TencentCloudSDKException e) {
            System.out.println(e.toString());
        }
    }
}

（1）（2）按照顺序执行，（3）（4）任选其一执行

（1）SecretID换成个人秘钥ID。

（2）SecretKey换成个人秘钥Key。

(3) 更换本地图片:Image path。

（4）注释掉：req.setImageBase64(imageBase64)的同时，取消: \\req.setImageUrl("Image_URL")的注释，并将ImageUrl换成自己的图片Url。

点击运行，即可获得识别结果

2.3 Go OCR API调用

首先配置好Go的环境，然后在终端进入项目路径下，终端执行如下命令(分步按顺序执行，切勿一起执行)：

go get sdk  
go get -v -u github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/ocr
go get -v -u github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common

在Go文件中运行如下代码：

package main

import (
	"encoding/base64"
	"fmt"
	"io/ioutil"
	v20181119 "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/ocr/v20181119"
)

func main() {
	cli, err := v20181119.NewClientWithSecretId("SecretID",
		"SecretKey",
		"ap-guangzhou")
	if err != nil {
		panic(err)
	}

	srcByte, err := ioutil.ReadFile(`本地jpg/png图片地址`)
	if err != nil {
		panic(err)
	}

	imageBase64 := base64.StdEncoding.EncodeToString(srcByte)

	//imageUrl := "https://baidu.com/xxx.jpg"
	req := v20181119.NewRecognizeTableAccurateOCRRequest()
	//req.ImageUrl = &imageUrl
	req.ImageBase64 = &imageBase64
	res, err := cli.RecognizeTableAccurateOCR(req)
	if err != nil {
		panic(err)
	}
	fmt.Println(res.ToJsonString())
}

（1）SecretID换成个人秘钥ID。

（2）SecretKey换成个人秘钥Key。

(3) 更换本地图片:Image path。

Go语言有些特殊，不允许变量的定义未使用，甚至引进包都不能无缘无故放进来，与Python很不一致，所以这里再放一个使用图片URL地址的代码，可以直接运行，无需改动：

package main

import (
	"fmt"
	// "io/ioutil"

	v20181119 "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/ocr/v20181119"
)

func main() {
	cli, err := v20181119.NewClientWithSecretId("SecretID",
		"SecretKey",
		"ap-guangzhou")
	if err != nil {
		panic(err)
	}

	imageUrl := "ImageURL"
	req := v20181119.NewRecognizeTableAccurateOCRRequest()
	req.ImageUrl = &imageUrl
	//req.ImageBase64 = &imageBase64
	res, err := cli.RecognizeTableAccurateOCR(req)
	if err != nil {
		panic(err)
	}
	fmt.Println(res.ToJsonString())
}

（1）SecretID换成个人秘钥ID。

（2）SecretKey换成个人秘钥Key。

(3) 更换本地图片:ImageUrl。

2.4 Nodejs OCR API调用

同样首先需要配置好Nodejs的基本环境，使用这门语言进行开发应该已经都配置好了。通过下面方式在终端安装腾讯云相关依赖：

npm install tencentcloud-sdk-nodejs --save

接着建立index.js文件，并在文件中运行如下代码：

const tencentcloud = require("tencentcloud-sdk-nodejs")
const fs = require("fs");
const util = require("util");

const OCRClient = tencentcloud.ocr.v20181119.Client

const client = new OCRClient({
    credential: {
        secretId: "Secret ID",
        secretKey: "Secret Key",
    },
    // 产品地域
    region: "ap-guangzhou",
})

const sample_img = fs.readFileSync('ImagePath', {encoding: 'base64'});

client.RecognizeTableAccurateOCR(
    {
        ImageBase64: sample_img.toString(),
    },
).then(
    (data) => {
        console.log(util.inspect(data, {depth: null}))
    },
    (err) => {
        console.error(err)
    }
)

（1）（2）（3）按照顺序执行

（1）SecretID换成个人秘钥ID。

（2）SecretKey换成个人秘钥Key。

(3) 更换本地图片:Image path。

若是Url图片执行如下代码：

const tencentcloud = require("tencentcloud-sdk-nodejs")
const fs = require("fs");
const util = require("util");

const OCRClient = tencentcloud.ocr.v20181119.Client

const client = new OCRClient({
    credential: {
        secretId: "Secret ID",
        secretKey: "Secret Key",
    },
    // 产品地域
    region: "ap-guangzhou",
})

const sample_img = fs.readFileSync('ImagePath', {encoding: 'base64'});

client.RecognizeTableAccurateOCR(
    {
         "ImageUrl": "YOUR_ImageUrlPath"
    },
).then(
    (data) => {
        console.log(util.inspect(data, {depth: null}))
    },
    (err) => {
        console.error(err)
    }
)

（1）（2）（3）按照顺序执行

（1）SecretID换成个人秘钥ID。

（2）SecretKey换成个人秘钥Key。

(3) 更换Url图片 :YOUR_ImageUrlPath为自己的图片。

2.5 .Net API调用

首先配置好C#的环境，初始化项目，在运行终端输入以下指令：


dotnet  new console//初始化dotnet 

dotnet add package TencentCloudSDK.Ocr//安装腾讯云的包，

dotnet run //安装成功后,运行这条命令（确保此时终端的路径为运行代码.cs文件的目录）

在配置好依赖之后，在.cs文件中运行以下代码：

using System;
using System.Threading.Tasks;
using TencentCloud.Common;
using TencentCloud.Common.Profile;
using TencentCloud.Ocr.V20181119;
using TencentCloud.Ocr.V20181119.Models;

namespace TencentCloudExamples
{
    class RecognizeTableAccurateOCR
    {
        static void Main(string[] args)
        {
            try
            {
                // 实例化一个认证对象，入参需要传入腾讯云账户 SecretId 和 SecretKey，此处还需注意密钥对的保密
                // 代码泄露可能会导致 SecretId 和 SecretKey 泄露，并威胁账号下所有资源的安全性。以下代码示例仅供参考，建议采用更安全的方式来使用密钥，请参见：https://cloud.tencent.com/document/product/1278/85305
                // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
                Credential cred = new Credential {
                    SecretId = "SecretID",
                    SecretKey = "SecretKey"
                };
                // 实例化一个client选项，可选的，没有特殊需求可以跳过
                ClientProfile clientProfile = new ClientProfile();
                // 实例化一个http选项，可选的，没有特殊需求可以跳过
                HttpProfile httpProfile = new HttpProfile();
                httpProfile.Endpoint = ("ocr.tencentcloudapi.com");
                clientProfile.HttpProfile = httpProfile;

                // 实例化要请求产品的client对象,clientProfile是可选的
                OcrClient client = new OcrClient(cred, "ap-shanghai", clientProfile);
                // 实例化一个请求对象,每个接口都会对应一个request对象
                RecognizeTableAccurateOCRRequest req = new RecognizeTableAccurateOCRRequest();
                req.ImageUrl = "Image_URL";
                // 返回的resp是一个RecognizeTableAccurateOCRResponse的实例，与请求对象对应
                RecognizeTableAccurateOCRResponse resp = client.RecognizeTableAccurateOCRSync(req);
                // 输出json格式的字符串回包
                Console.WriteLine(AbstractModel.ToJsonString(resp));
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
            Console.Read();
        }
    }
}

（1）（2）（3）按照顺序执行

（1）SecretID换成个人秘钥ID。

（2）SecretKey换成个人秘钥Key。

(3) 更换Url图片 :YOUR_ImageUrlPath为自己的图片。

当png为本地图片时，执行如下代码：

using System;
using System.Threading.Tasks;
using TencentCloud.Common;
using TencentCloud.Common.Profile;
using TencentCloud.Ocr.V20181119;
using TencentCloud.Ocr.V20181119.Models;

namespace TencentCloudExamples
{
    class RecognizeTableAccurateOCR
    {
        static void Main(string[] args)
        {
            try
            {
                // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
                Credential cred = new Credential {
                    SecretId = "SecretID",
                    SecretKey = "SecretKey"
                };
                // 实例化一个client选项，可选的，没有特殊需求可以跳过
                ClientProfile clientProfile = new ClientProfile();
                // 实例化一个http选项，可选的，没有特殊需求可以跳过
                HttpProfile httpProfile = new HttpProfile();
                httpProfile.Endpoint = ("ocr.tencentcloudapi.com");
                clientProfile.HttpProfile = httpProfile;

                // 实例化要请求产品的client对象,clientProfile是可选的
                OcrClient client = new OcrClient(cred, "ap-shanghai", clientProfile);
                // 实例化一个请求对象,每个接口都会对应一个request对象
                RecognizeTableAccurateOCRRequest req = new RecognizeTableAccurateOCRRequest();
                byte[] image_Bytes = File.ReadAllBytes("Image_Path");
                string Base64 = Convert.ToBase64String(image_Bytes);
                req.ImageBase64 = Base64;
                // 返回的resp是一个RecognizeTableAccurateOCRResponse的实例，与请求对象对应
                RecognizeTableAccurateOCRResponse resp = client.RecognizeTableAccurateOCRSync(req);
                // 输出json格式的字符串回包
                Console.WriteLine(AbstractModel.ToJsonString(resp));
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
            Console.Read();
        }
    }
}

（1）（2）（3）按照顺序执行

（1）SecretID换成个人秘钥ID。

（2）SecretKey换成个人秘钥Key。

(3) 更换本地图片 :ImagePath换成自己的图片地址。

2.6 C++ API调用

C++环境这里介绍linux系统环境的配置和API的调用。

首先需要在linux上配置好C++开发环境，例如gdb这些，能够正常初始化、编译、运行一个cpp项目。然后，打开终端，输入以下命令安装cmake：

//ubuntu版本
sudo apt-get install cmake

//centos版本
yum install cmake3

安装依赖库：libcurl

//ubuntu
sudo apt-get install uuid-dev

//centos
yum install libuuid-devel

安装依赖库 :openssl

//ubuntu
sudo apt-get install libssl-dev

//centos
yum install openssl-devel

安装依赖库：libuuid

//ubuntu
sudo apt-get install uuid-dev

//centos
yum install libuuid-devel

PS：这里有些同学可能会出现以上库安装失败的问题，（包括我自己），通过查询资料，大概率可能是由于没有启用i386架构：所以我们需要在终端运行这样两条命令：

dpkg --add-architecture i386
apt-get update

然后去执行以上三条命令去分别安装三个库，就可以成功啦！（若库一次性就安装成功库的同学可以忽略这两条命令。）

接下来，我们开始安装SDK,首先切换到一个下载C++库的路径，然后在终端依次（一定要一个一个执行，不要复制粘贴一起执行）执行如下命令：

git clone https://github.com/TencentCloud/tencentcloud-sdk-cpp
cd tencentcloud-sdk-cpp
mkdir sdk_build
cd sdk_build
# centos 下使用 cmake3 ..  /ubantu上使用则仍然是cmake命令
# 指定产品编译，分号;分隔
cmake -DBUILD_MODULES="ocr" ..
make
sudo make install

然后在cpp文件中执行下列代码：

#include <tencentcloud/core/Credential.h>
#include <tencentcloud/core/profile/ClientProfile.h>
#include <tencentcloud/core/profile/HttpProfile.h>
#include <tencentcloud/ocr/v20181119/OcrClient.h>
#include <tencentcloud/ocr/v20181119/model/RecognizeTableAccurateOCRRequest.h>
#include <tencentcloud/ocr/v20181119/model/RecognizeTableAccurateOCRResponse.h>
#include <iostream>
#include <string>
#include <vector>

using namespace TencentCloud;
using namespace TencentCloud::Ocr::V20181119;
using namespace TencentCloud::Ocr::V20181119::Model;
using namespace std;

int main() {
        // 实例化一个认证对象，入参需要传入腾讯云账户 SecretId 和 SecretKey，此处还需注意密钥对的保密
        // 代码泄露可能会导致 SecretId 和 SecretKey 泄露，并威胁账号下所有资源的安全性。以下代码示例仅供参考，建议采用更安全的方式来使用密钥，请参见：https://cloud.tencent.com/document/product/1278/85305
        // 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
        Credential cred = Credential("SecretId", "SecretKey");

        // 实例化一个http选项，可选的，没有特殊需求可以跳过
        HttpProfile httpProfile = HttpProfile();
        httpProfile.SetEndpoint("ocr.tencentcloudapi.com");

        // 实例化一个client选项，可选的，没有特殊需求可以跳过
        ClientProfile clientProfile = ClientProfile();
        clientProfile.SetHttpProfile(httpProfile);
        // 实例化要请求产品的client对象,clientProfile是可选的
        OcrClient client = OcrClient(cred, "ap-shanghai", clientProfile);

        // 实例化一个请求对象,每个接口都会对应一个request对象
        RecognizeTableAccurateOCRRequest req = RecognizeTableAccurateOCRRequest();
        
        //req.SetImageBase64("vdfvdv");
        req.SetImageUrl("ImageURL");

        // 返回的resp是一个RecognizeTableAccurateOCRResponse的实例，与请求对象对应
        auto outcome = client.RecognizeTableAccurateOCR(req);
        if (!outcome.IsSuccess())
        {
            cout << outcome.GetError().PrintAll() << endl;
            return -1;
        }
        RecognizeTableAccurateOCRResponse resp = outcome.GetResult();
        // 输出json格式的字符串回包
        cout << resp.ToJsonString() << endl;
    
    return 0;
}

以上代码中只需要换三个位置（1）SecretID换成个人秘钥ID。（2）SecretKey换成个人秘钥Key。（3）更换图片URL链接:Image_URL，即可正常运行。