C++：opencv将文字图片按行分割，可以用来提高OCR项目的识别率

KeNanKN

已于 2023-06-01 17:44:15 修改

阅读量545

点赞数 1

分类专栏： OCR opencv 文章标签： ocr c++ opencv 图像处理计算机视觉

于 2023-05-31 21:12:30 首次发布

本文链接：https://blog.csdn.net/KeNanKN/article/details/130976960

版权

OCR 同时被 2 个专栏收录

1 篇文章 0 订阅

订阅专栏

opencv

1 篇文章 0 订阅

订阅专栏

先上效果图

代码

#include<opencv2/opencv.hpp>
#include<iostream>

//using namespace cv;
using namespace std;

 //计算旋转矩形左上角的x
 float count_x(cv::RotatedRect& p1) {
     if (p1.angle >=-10&&p1.angle<=10)return p1.center.x - p1.size.width / 2 * std::cos(p1.angle * CV_PI / 180) + p1.size.height / 2 * std::sin(p1.angle * CV_PI / 180);
     return p1.center.x - p1.size.width / 2 * std::cos(p1.angle * CV_PI / 180) - p1.size.height / 2 * std::sin(p1.angle * CV_PI / 180);
 }

 计算旋转矩形左上角的y
 float count_y(cv::RotatedRect& p1) {
     if (p1.angle >= -10 && p1.angle <= 10)return p1.center.y - p1.size.width / 2 * std::sin(p1.angle * CV_PI / 180) - p1.size.height / 2 * std::cos(p1.angle * CV_PI / 180);
     return p1.center.y - p1.size.width / 2 * std::sin(p1.angle * CV_PI / 180) + p1.size.height / 2 * std::cos(p1.angle * CV_PI / 180);
 }


//形成文字轮廓
cv::Mat preprocess(cv::Mat gray)
{
    //1.Sobel算子，x方向求梯度
    Sobel(gray, gray, CV_8U, 1, 0, 3);
    //imshow("3", gray);
    
    //2.二值化
    threshold(gray, gray, 0, 255, cv::THRESH_OTSU + cv::THRESH_BINARY);
    //imshow("4", gray);
    
    //3.膨胀和腐蚀操作核设定
    cv::Mat element1 = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(24, 9));
    //控制高度设置可以控制上下行的膨胀程度，例如3比4的区分能力更强,但也会造成漏检
    cv::Mat element2 = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(24, 4));

    //4.膨胀一次，让轮廓突出
    dilate(gray, gray, element2);
    //imshow("0", gray);
    
    //5.腐蚀一次，去掉细节，表格线等。这里去掉的是竖直的线
    erode(gray, gray, element1);
    //imshow("1", gray);
   
    //6.再次膨胀，让轮廓明显一些
    dilate(gray, gray, element2);
    //imshow("2", gray);

    return gray;
}


//存储文字轮廓坐标
vector<cv::RotatedRect> findTextRegion(cv::Mat img)
{
    vector<cv::RotatedRect> rects_coord;
    //1.查找轮廓
    vector<vector<cv::Point>> contours;//存储图像中的轮廓。
    vector<cv::Vec4i> hierarchy;//存储轮廓的层级信息。
    findContours(img, contours, hierarchy, cv::RETR_CCOMP, cv::CHAIN_APPROX_SIMPLE, cv::Point(0, 0));



    //2.筛选那些面积小的
    for (int i = 0; i < contours.size(); i++)
    {
        //计算当前轮廓的面积
        double area = cv::contourArea(contours[i]);

        //轮廓近似，作用较小，approxPolyDP函数有待研究
        double epsilon = 0.001 * arcLength(contours[i], true);
        cv::Mat approx;
        approxPolyDP(contours[i], approx, epsilon, true);

        //找到最小矩形，该矩形可能有方向
        cv::RotatedRect rect_min = minAreaRect(contours[i]);

        //计算高和宽
        int m_width = rect_min.boundingRect().width;
        int m_height = rect_min.boundingRect().height;

        
        //筛选那些太细的矩形，留下扁的
        if (m_height > m_width*1.2)
            continue;
        if (m_width <= 30 && m_height > m_width * 0.82)//判断例如{} 去掉竖线
            continue;
        if (m_height < 10)  //去掉未截完的文字
            continue;
        rects_coord.push_back(rect_min);

    }
    return rects_coord;
}

//文字矩形从下到上从左到右排序
bool cmp(cv::RotatedRect& p1, cv::RotatedRect& p2) {
    float y1= count_y(p1);
    float y2= count_y(p2);
    float x1= count_x(p1);
    float x2= count_x(p2);

    if (fabs(y1 - y2) < 5)return x1 < x2;
    else return y1 > y2;
}

//裁剪图片
cv::Mat* detect(cv::Mat img,int &count)
{
    cv::Mat gray;
    img.copyTo(gray);
    count = 0;

    //2.形态学变换的预处理，得到可以查找矩形的轮廓
    cv::Mat dilation = preprocess(gray);
    //imshow("bbb", gray);

    //3.查找和筛选文字区域
    vector<cv::RotatedRect> rects;
    vector<cv::Point2f> textrect;//存储每行文字的左上角坐标
    vector<pair<float, float>>lenth;//存储宽高信息。
    rects = findTextRegion(dilation);

    sort(rects.begin(), rects.end(), cmp);
     


    //第一个矩形的信息
    float text_width, text_height;
    text_width = rects[0].size.width;
    text_height= rects[0].size.height;

    if (text_width > 10 || text_height > 10) {
        if (text_width < text_height)swap(text_width, text_height);//使文字的宽永远大于高
    }
    cv::Point2f text_point(count_x(rects[0]), count_y(rects[0]));
    pair<float, float>p(text_width, text_height);
    textrect.push_back(text_point);
    lenth.push_back(p);
    count = 0;//计算有多少行



    //同一行合并
    for (int i = 0; i < rects.size(); i++)
    {
        if (i != rects.size() - 1) {
            text_width = rects[i].size.width;
            text_height = rects[i].size.height;
            float text_width1 = rects[i+1].size.width;
            float text_height1 = rects[i+1].size.height;
            if (text_width > 10 || text_height > 10) {
                if (text_width < text_height)swap(text_width, text_height);//使文字的宽永远大于高
            }
            if (text_width1 > 10 || text_height1 > 10) {
                if (text_width1 < text_height1)swap(text_width1, text_height1);//使文字的宽永远大于高
            }

            float y1 = count_y(rects[i]);
            float y2 = count_y(rects[i+1]);
            float x1 = count_x(rects[i]);
            float x2 = count_x(rects[i+1]);
            if (fabs(y2 - y1) < 5) {
                lenth[count].first = fabs(x2 - textrect[count].x)+ text_width1;
                lenth[count].second = max(text_height, text_height);

            }
            else {
                count++;
                text_point.x = x2, text_point.y = y2;
                textrect.push_back(text_point);
                p.first = text_width1; p.second = text_height1;
                lenth.push_back(p);
            }
        }
    }


    //把合并后的每一行单独作为图片输出
    cv::Mat* text_part = new cv::Mat[count+1];
    for (int i = count; i >=0; i--) {
        text_part[i] = cv::Mat(lenth[i].second+15, lenth[i].first+15, CV_8UC3);
        cv::Rect rect(textrect[i].x-5, textrect[i].y-5, lenth[i].first, lenth[i].second+9);//注意左上角的纵坐标比左下角要小，
        text_part[i] = img(rect).clone();
        cv::imshow("切割图片 " + std::to_string(i), text_part[i]);
    }
      
    return text_part;
}


int main(int argc, char* argv[])
{
    int count = 0;
    cv::Mat image_dispose = cv::imread("aaaccc.png");
    if (image_dispose.empty()) {
        std::cout << "无法加载图像文件" << std::endl;
        return 0;
    }

    cvtColor(image_dispose, image_dispose, cv::COLOR_BGR2GRAY); 
    // 创建一个map用于存储每个像素值的频次
    std::map<uchar, int> colorFreq;

    // 遍历图像的每个像素
    for (int i = 0; i < image_dispose.rows; i++) {
        for (int j = 0; j < image_dispose.cols; j++) {
            // 获取像素值
            uchar pixel = image_dispose.at<uchar>(i, j);
            
            // 像素值频次加一
            colorFreq[pixel]++;
        }
    }

    // 找到频次最高的像素值和频次
    int mostFrequentColor = 0;
    int maxFrequency = 0;

    std::map<uchar, int>::iterator it;
    for (it = colorFreq.begin(); it != colorFreq.end(); ++it) {
         std::pair<int, int> pair = *it;
         int frequency = pair.second;
        if (frequency > maxFrequency) {
            maxFrequency = frequency;
            mostFrequentColor = pair.first;
        }
    }
    //调试，得到像素值最多的颜色

        copyMakeBorder(image_dispose, image_dispose, 15, 15, 15, 15, cv::BORDER_CONSTANT, mostFrequentColor <= 120 ? cv::Scalar(255 - mostFrequentColor) : cv::Scalar(mostFrequentColor));//添加白色像素为10的边框

    cv::Mat* img;
    img=detect(image_dispose,count);

    imshow("原图", image_dispose);
    cv::waitKey(0);
    delete[]img;
    return 0;
}

并且我自己写了一个OCR软件，如果需要可以看这篇文章C++：分享一款自己写的可以对图片文字识别的单机软件 - 知乎 (zhihu.com)

本站下载链接：https://download.csdn.net/download/KeNanKN/87848965

参考资料：(8条消息) OpenCV2.4.13 文本分割（水平垂直，直方图投影）_songyalong123的博客-CSDN博客