利用投影法基于Opencv的文本定位

最新推荐文章于 2023-03-02 16:25:30 发布

Suninsky！

最新推荐文章于 2023-03-02 16:25:30 发布

阅读量2.8k

点赞数

本文链接：https://blog.csdn.net/suninsky_plate/article/details/79332963

版权

针对发票识别之类的自然环境中文本识别，对结果影响最大的应该是文本定位的问题。

下面本人参考 BoyTNT 的基于OpenCV进行文本分块切割(C#)

本人写了一份c++的代码仅供参考

因为有一些干扰，所以文本定位并不是很完美，可以通过二值化的方法去掉一些干扰，以后再改进。

代码

#include <opencv2\opencv.hpp>
#include <vector>
#include <iostream>
using namespace cv;
using namespace std;

vector<CvRect> GetRowRects(Mat gray)
{
    vector<CvRect>rows;
    int height = gray.rows;
    int *projection = new int[height]();
    imshow("d", gray);
    IplImage  src = gray;
    for (int y = 0; y < gray.rows; ++y)
    {
        for (int x = 0; x < gray.cols; ++x)
        {
            CvScalar s;
            s = cvGet2D(&src, y, x);
            if (int(s.val[0]) == 255)
                projection[y]++;
        }
    }

    bool inLine = false;
    int start = 0;

    for (int i = 0; i < height; i++)
    {
        if (!inLine && projection[i] > 10)
        {
            //由空白进入字符区域了，记录标记
            inLine = true;
            start = i;
        }
        else if ((i - start > 5) && projection[i] < 10 && inLine)
        {
            //由字符区域进入空白区域了
            inLine = false;

            //忽略高度太小的行，比如分隔线
            if (i - start > 10)
            {
                //记录下位置
                CvRect rect = cvRect(0, start - 1, gray.cols, i - start + 2);
                rows.push_back(rect);
            }
        }
    }
    delete projection;
    return rows;
}

vector<CvRect> GetBlockRects(Mat gray, int rowY)
{
    IplImage  src = gray;
    vector<CvRect> blocks;
    int height = gray.rows;
    int width = gray.cols;

    //用于存储投影值
    int *projection = new int[width]();

    //横向膨胀
    Mat element = getStructuringElement(MORPH_RECT, Size(10, 1));

    //进行腐蚀操作  
    dilate(gray, gray, element);
    imshow("fushi", gray);
    for (int x = 0; x < width; ++x)
    {
        for (int y = 0; y < height; ++y)
        {
            Scalar s = cvGet2D(&src, y, x);
            if (s.val[0] == 255)
                projection[x]++;
        }
    }

    bool inBlock = false;
    int start = 0;

    //开始根据投影值识别分割点
    for (int i = 0; i < width; ++i)
    {
        if (!inBlock && projection[i] >= 2)
        {
            //由空白区域进入字符区域了
            inBlock = true;
            start = i;
        }
        else if ((i - start > 10) && inBlock && projection[i] < 2)
        {
            //由字符区域进入空白区域了
            inBlock = false;
            //记录位置，注意由于传入的是source只是一行，因此最终的位置信息要+rowY
            if (blocks.size() > 0)
            {
                //跟上一个比一下，如果距离过近，认为是同一个文本块，合并
                CvRect last = blocks[blocks.size() - 1];

                if (start - last.x - last.width <= 5)
                {
                    blocks.pop_back();
                    CvRect rect =cvRect(last.x, rowY, i - last.x, height);
                    blocks.push_back(rect);
                }
                else
                {
                    CvRect rect = cvRect(start, rowY, i - start, height);
                    blocks.push_back(rect);
                }
            }
            else
            {
                CvRect rect = cvRect(start, rowY, i - start, height);
                blocks.push_back(rect);
            }
        }
    }
    return blocks;
}

int main(int argc, char* argv[])
{
    string path = "D:/Project/FapiaoSystem/test/fapiao6.jpg";
    Mat image = imread(path);
    imshow("原图", image);
    //读入原图

    //压缩
    if (image.rows*image.cols>1000000)
    {
        int t;
        if (image.cols>image.rows)t = 1000;
        else t = 800;
        resize(image, image, Size(t, image.rows*1.0 / image.cols * t), 0, 0, CV_INTER_LINEAR);
    }

    Mat gray, data;
    cvtColor(image, gray, CV_BGR2GRAY);
    data = image.clone();

    int blockSize = 25;
    int constValue = 10;
    //threshold(gray, gray, 0, 255, THRESH_BINARY_INV | CV_THRESH_OTSU);
    adaptiveThreshold(gray, gray, 255, CV_ADAPTIVE_THRESH_MEAN_C, CV_THRESH_BINARY_INV, blockSize, constValue);
    imshow("1", gray);
    vector<CvRect>rows = GetRowRects(gray);
    cout << rows.size();

    vector<CvRect>items;
    for each(CvRect row in rows)
    {
        vector<CvRect> cols = GetBlockRects(gray(row), row.y);
        items.insert(items.end(), cols.begin(), cols.end());   //把cols push到items
    }
    for each (CvRect rect in items)
    {

        rectangle(image, rect,(255,0,0));
    }
    imshow("result", image);
    waitKey(0);
}