easyocr是一个python文字识别库,支持80种语言。直接用python调用的话,还是很简单的,代码量很少,对用户友好满分。如果需要C++调用,需要用到c-python。这块千万别去chatgpt,有很多坑。
0,github:GitHub - JaidedAI/EasyOCR: Ready-to-use OCR with 80+ supported languages and all popular writing scripts including Latin, Chinese, Arabic, Devanagari, Cyrillic and etc.1,测试及支持语言列表:Jaided AI: EasyOCR demo
2,安装
如果需要GPU版本的easyocr,需要先安装GPU版本的torch库等(这块比较复杂)。如果只是需要CPU版本的easyocr的话,直接一句指令安装即可:
pip3 install easyocr -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn
3,python调用直接识别文件
import easyocr
reader = easyocr.Reader(['en'])
result = reader.readtext('/home/huahua/lpr_eng.jpg)
print(result)
4,python调用识别mat
import easyocr
import cv2
reader = easyocr.Reader(['en'])
img = cv2.imread('/home/huahua/lpr_eng.jpg')
result = reader.readtext(img)
print(result)
5,python调用增加可选参数白名单
import easyocr
import cv2
reader = easyocr.Reader(['en'])
img = cv2.imread('/home/huahua/lpr_eng.jpg')
result = reader.readtext(img, allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890')
print(result)
6,C++调用直接识别文件
#include <Python.h>
#include <iostream>
//编译:g++ -o callEasyOcr callEasyOcr.cpp -I /usr/include/python3.8 -lpython3.8
int main() {
// 初始化Python解释器
Py_Initialize();
// 导入EasyOCR模块
PyObject* pModule = PyImport_ImportModule("easyocr");
if (pModule != NULL) {
std::cout<<"PyImport_ImportModule easyocr success!"<<std::endl;
// 获取EasyOCR类
PyObject* pClass = PyObject_GetAttrString(pModule, "Reader");
if (pClass != NULL && PyCallable_Check(pClass)) {
std::cout<<"PyObject_GetAttrString Reader success!"<<std::endl;
// 准备参数
PyObject* langs = PyList_New(2);
PyList_SetItem(langs, 0, PyUnicode_FromString("ch_sim"));
PyList_SetItem(langs, 1, PyUnicode_FromString("en"));
PyObject* pArgs = PyTuple_New(1);
PyTuple_SetItem(pArgs, 0, langs);
// 创建EasyOCR对象
PyObject* pInstance = PyObject_CallObject(pClass, pArgs);
if (pInstance != NULL) {
std::cout<<"PyObject_CallObject pClass success!"<<std::endl;
// 调用EasyOCR对象的readtext方法
PyObject* pResult = PyObject_CallMethod(pInstance, "readtext", "(s)", "/home/huahua/lpr_ch_en.jpg");
if (pResult != NULL) {
std::cerr << "Call readtext success" << std::endl;
// 解析结果
int resultSize = PyList_Size(pResult);
for(int i=0;i<resultSize;i++)
{
PyObject *listItem = PyList_GetItem(pResult,i);
Py_ssize_t listItemsize = PyTuple_Size(listItem);
for (Py_ssize_t i = 0; i < listItemsize; ++i) {
PyObject* tupleItem = PyTuple_GetItem(listItem, i);
if (tupleItem != NULL && PyUnicode_Check(tupleItem)) {
const char* text = PyUnicode_AsUTF8(tupleItem);
std::cout << text << std::endl;
}
}
}
Py_DECREF(pResult);
} else {
std::cerr << "Call croppedImage failed" << std::endl;
PyErr_Print();
}
// 释放EasyOCR对象
Py_DECREF(pInstance);
} else {
std::cerr << "Failed to create instance" << std::endl;
PyErr_Print();
}
// 释放EasyOCR类
Py_DECREF(pClass);
} else {
std::cerr << "Class not found" << std::endl;
PyErr_Print();
}
// 释放EasyOCR模块
Py_DECREF(pModule);
} else {
PyErr_Print();
std::cerr << "Module not found" << std::endl;
}
// 清理Python解释器
Py_Finalize();
return 0;
}
7,C++调用识别mat
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <Python.h>
#include <numpy/arrayobject.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <unistd.h>
#include <time.h>
//编译:g++ -o easyocr-opencv easyocr-opencv.cpp -I /usr/include/python3.8 -I /home/huahua/.local/lib/python3.8/site-packages/numpy/core/include -lpython3.8 `pkg-config --cflags --libs opencv4`
//读取图片
cv::Mat readPictureFile(char* filePath)
{
// 1. 读取整个图片
cv::Mat originalImage = cv::imread(filePath, cv::IMREAD_COLOR); // 替换为您的输入图片路径
if (originalImage.empty())
{
std::cerr << "Could not open or find the image" << std::endl;
}
return originalImage;
}
int main() {
// 初始化Python解释器
Py_Initialize();
import_array();//使用numy数组传入图片数据要加的
// 加载EasyOCR模块
PyObject* easyocr_module = PyImport_ImportModule("easyocr");
if (easyocr_module == NULL) {
std::cerr << "Failed to import easyocr module" << std::endl;
PyErr_Print();
return 1;
}
// 获取EasyOCR的Reader类
PyObject* reader_class = PyObject_GetAttrString(easyocr_module, "Reader");
if (reader_class == NULL || !PyCallable_Check(reader_class)) {
std::cerr << "Failed to get easyocr Reader class" << std::endl;
PyErr_Print();
Py_XDECREF(easyocr_module);
return 1;
}
// 准备参数
PyObject* langs = PyList_New(2);
PyList_SetItem(langs, 0, PyUnicode_FromString("ch_sim"));
PyList_SetItem(langs, 1, PyUnicode_FromString("en"));
PyObject* pArgs = PyTuple_New(1);
PyTuple_SetItem(pArgs, 0, langs);
// 创建Reader对象
PyObject* reader_instance = PyObject_CallObject(reader_class, pArgs);
if (reader_instance == NULL) {
std::cerr << "Failed to create easyocr Reader instance" << std::endl;
PyErr_Print();
Py_XDECREF(easyocr_module);
Py_XDECREF(reader_class);
return 1;
}
// 加载并读取截图数据为OpenCV的Mat对象,假设截图数据已经存储在cv::Mat对象image中
cv::Mat originalImage = readPictureFile("//home/huahua/lpr_croppedImage.jpg");//读文件
cv::Mat croppedImage;cv::cvtColor(originalImage, croppedImage, cv::COLOR_BGR2RGB);
// 将OpenCV的Mat对象转换为numpy数组对象
npy_intp dims[] = {croppedImage.rows, croppedImage.cols, croppedImage.channels()};
PyObject *numpy_array = PyArray_SimpleNewFromData(3, dims, NPY_UINT8, croppedImage.data);//记得加import_array();不然会段错误
// 调用OCR识别
printTime("readtext begin");
PyObject* pResult = PyObject_CallMethod(reader_instance, "readtext", "(O)", numpy_array);
printTime("readtext end");
Py_XDECREF(numpy_array);
if (pResult == NULL) {
std::cerr << "Failed to call easyocr readtext method" << std::endl;
PyErr_Print();
} else {
std::cerr << "Call success" << std::endl;
// 解析结果
int resultSize = PyList_Size(pResult);
for(int i=0;i<resultSize;i++)
{
PyObject *listItem = PyList_GetItem(pResult,i);
Py_ssize_t listItemsize = PyTuple_Size(listItem);
for (Py_ssize_t i = 0; i < listItemsize; ++i) {
PyObject* tupleItem = PyTuple_GetItem(listItem, i);
if (tupleItem != NULL && PyUnicode_Check(tupleItem)) {
const char* text = PyUnicode_AsUTF8(tupleItem);
std::cout << text << std::endl;
}
}
}
}
Py_DECREF(pResult);
// 释放资源
Py_XDECREF(easyocr_module);
Py_XDECREF(reader_class);
Py_XDECREF(reader_instance);
// 清理Python解释器
Py_Finalize();
return 0;
}
特征:库加载的时候需要的时间和硬件资源比较多,基本CPU要跑满。
优势:会自动下载新的语言包(需要网络)