针对发票识别之类的自然环境中文本识别,对结果影响最大的应该是文本定位的问题。
下面本人参考 BoyTNT 的基于OpenCV进行文本分块切割(C#)
本人写了一份c++的代码 仅供参考
因为有一些干扰,所以文本定位并不是很完美,可以通过二值化的方法去掉一些干扰,以后再改进。
代码
#include <opencv2\opencv.hpp>
#include <vector>
#include <iostream>
using namespace cv;
using namespace std;
vector<CvRect> GetRowRects(Mat gray)
{
vector<CvRect>rows;
int height = gray.rows;
int *projection = new int[height]();
imshow("d", gray);
IplImage src = gray;
for (int y = 0; y < gray.rows; ++y)
{
for (int x = 0; x < gray.cols; ++x)
{
CvScalar s;
s = cvGet2D(&src, y, x);
if (int(s.val[0]) == 255)
projection[y]++;
}
}
bool inLine = false;
int start = 0;
for (int i = 0; i < height; i++)
{
if (!inLine && projection[i] > 10)
{
//由空白进入字符区域了,记录标记
inLine = true;
start = i;
}
else if ((i - start > 5) && projection[i] < 10 && inLine)
{
//由字符区域进入空白区域了
inLine = false;
//忽略高度太小的行,比如分隔线
if (i - start > 10)
{
//记录下位置
CvRect rect = cvRect(0, start - 1, gray.cols, i - start + 2);
rows.push_back(rect);
}
}
}
delete projection;
return rows;
}
vector<CvRect> GetBlockRects(Mat gray, int rowY)
{
IplImage src = gray;
vector<CvRect> blocks;
int height = gray.rows;
int width = gray.cols;
//用于存储投影值
int *projection = new int[width]();
//横向膨胀
Mat element = getStructuringElement(MORPH_RECT, Size(10, 1));
//进行腐蚀操作
dilate(gray, gray, element);
imshow("fushi", gray);
for (int x = 0; x < width; ++x)
{
for (int y = 0; y < height; ++y)
{
Scalar s = cvGet2D(&src, y, x);
if (s.val[0] == 255)
projection[x]++;
}
}
bool inBlock = false;
int start = 0;
//开始根据投影值识别分割点
for (int i = 0; i < width; ++i)
{
if (!inBlock && projection[i] >= 2)
{
//由空白区域进入字符区域了
inBlock = true;
start = i;
}
else if ((i - start > 10) && inBlock && projection[i] < 2)
{
//由字符区域进入空白区域了
inBlock = false;
//记录位置,注意由于传入的是source只是一行,因此最终的位置信息要+rowY
if (blocks.size() > 0)
{
//跟上一个比一下,如果距离过近,认为是同一个文本块,合并
CvRect last = blocks[blocks.size() - 1];
if (start - last.x - last.width <= 5)
{
blocks.pop_back();
CvRect rect =cvRect(last.x, rowY, i - last.x, height);
blocks.push_back(rect);
}
else
{
CvRect rect = cvRect(start, rowY, i - start, height);
blocks.push_back(rect);
}
}
else
{
CvRect rect = cvRect(start, rowY, i - start, height);
blocks.push_back(rect);
}
}
}
return blocks;
}
int main(int argc, char* argv[])
{
string path = "D:/Project/FapiaoSystem/test/fapiao6.jpg";
Mat image = imread(path);
imshow("原图", image);
//读入原图
//压缩
if (image.rows*image.cols>1000000)
{
int t;
if (image.cols>image.rows)t = 1000;
else t = 800;
resize(image, image, Size(t, image.rows*1.0 / image.cols * t), 0, 0, CV_INTER_LINEAR);
}
Mat gray, data;
cvtColor(image, gray, CV_BGR2GRAY);
data = image.clone();
int blockSize = 25;
int constValue = 10;
//threshold(gray, gray, 0, 255, THRESH_BINARY_INV | CV_THRESH_OTSU);
adaptiveThreshold(gray, gray, 255, CV_ADAPTIVE_THRESH_MEAN_C, CV_THRESH_BINARY_INV, blockSize, constValue);
imshow("1", gray);
vector<CvRect>rows = GetRowRects(gray);
cout << rows.size();
vector<CvRect>items;
for each(CvRect row in rows)
{
vector<CvRect> cols = GetBlockRects(gray(row), row.y);
items.insert(items.end(), cols.begin(), cols.end()); //把cols push到items
}
for each (CvRect rect in items)
{
rectangle(image, rect,(255,0,0));
}
imshow("result", image);
waitKey(0);
}