文字识别

1、video.h用摄像头拍摄数据,并将视频数据转为图像;

#include "opencv2/opencv.hpp"
#include "highgui.h"
#include <iostream>
#include <vector>
#include <string>
using namespace cv;
using namespace std;

VideoCapture CreateCapture(int argc, char** argv);
vector<Mat> ReadPictureFromViedeo(VideoCapture& capture);

VideoCapture CreateCapture(int argc, char* argv[])
{
    VideoCapture capture;
    
    if(argc == 1)
    {
	capture.open(1);
	if (!capture.isOpened())
	{
	    cerr << "ERROR ! Unable to open camera\n";
	    return -1;
	}
    }
    else{
	capture.open(argv[1]);
	if(!capture.isOpened())
	{
	    cerr << "ERROR ! The video does not exist\n";
	    return -1;
	}
    }
    return capture;
}

vector<Mat> ReadPictureFromViedeo(VideoCapture& capture)
{
    cvNamedWindow("video");
    vector<Mat> image;
    Mat frame;
    
    int number = 0;
    
    while (1)
    {
	capture>>frame;
	if (frame.empty() )
	{
	    cerr << "ERROR ! blank frame grabbed\n";
	    break;
	}
	imshow("viedo", frame);
	stringstream ss;
	ss << "image_" << number << ".png"; 
	string  ImageName = "/home/znkz/gzh/c++/test/build/Data/" + ss.str();
	imwrite(ImageName.c_str(), frame);
	image.push_back(frame);
	number++;
	//wait for a key with timeout long enough to show images
	if (waitKey(5) >= 0)
	    break;
    }
    return image;
}

2、特征描述

Mat hog(Mat& img)
{
    	HOGDescriptor hog(Size(32, 32), Size(8, 8), Size(4, 4), Size(4, 4), 4);
	vector<float> descriptors;   //HOG描述子向量
	hog.compute(img, descriptors, Size(4, 4));   //计算HOG描述子,检测窗口移动步长(8,8)
	int DescriptorDim = descriptors.size();
	Mat featureMat = Mat::zeros(1, DescriptorDim, CV_32F);
	for (int i = 0; i<DescriptorDim; i++)
	    featureMat.at<float>(0, i) = descriptors[i];
	return featureMat;
}

3、提取训练数据

void getTrainData(string filePath, Mat& trainData, Mat& trainLabels, Mat& testData, Mat& testLabels)
{
    Mat image = imread(filePath);
    Mat gray;
    cvtColor(image, gray, CV_BGR2GRAY);
    threshold(gray, gray, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
    int b = 20;
    int m = image.rows/b;
    int n = image.cols/b;
    Mat data, labels;
    //注意先列后行,不然取得的训练数据只有0-6,没有后面的数据,可以考虑顺序打乱,使数据更加均匀
    for (int i = 0;i < n; i++)
    {
	int leftTopCol = i * b;
	for (int j = 0; j < m; j++)
	{
	    int leftTopRow = j * b;  //图像块左上角所在行
	    
	    //取出图像块
	    Mat tempPixel;
	     Mat tempProject;
	     Mat temp;
	     Mat featureMat;
	    gray(Range(leftTopRow, leftTopRow + b), Range(leftTopCol, leftTopCol + b)).copyTo(tempPixel);

/*	    tempProject = charFeatures(tempPixel, 20);
	    
	    tempPixel = tempPixel.reshape(0, 1);
	    tempPixel.convertTo(tempPixel, CV_32F);
	    tempProject.convertTo(tempProject, CV_32F);
	hconcat(tempPixel, tempProject, temp);*/
	
	
	//HOG提取特征
	Mat hog_train;
	resize(tempPixel, hog_train, Size(20 * 4, 20 * 4), 0, 0);
	featureMat = hog(hog_train);
	featureMat.convertTo(featureMat, CV_32F);
	    
	    data.push_back(featureMat);
	    labels.push_back((int)j / 5);
	}
    }
   //分配训练数据和测试数据
   data.convertTo(data, CV_32F);  //TrainData::create格式需求
   int sampleNum = data.rows;
   int trainNum = 5000;
   int testNum = sampleNum - trainNum;
   trainData = data(Range(0, trainNum), Range::all());
   trainLabels = labels(Range(0, trainNum), Range::all());
   
   testData = data(Range(trainNum, sampleNum), Range::all());
   testLabels = labels(Range(trainNum, sampleNum), Range::all());
}

3、文字分割

#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
#include <algorithm> 
#include <vector>
using namespace cv;
using namespace std;

#define V_PROJECT 1
#define H_PROJECT 2

typedef struct
{
	int begin;
	int end;

}char_range_t;

void draw_projection(vector<int>& pos, int mode)
{
	//vector<int>::iterator max = std::max_element(std::begin(pos), std::end(pos)); //求最大值
        vector<int>::iterator max = std::max_element(pos.begin(), pos.end()); //求最大值
	if (mode == H_PROJECT)
	{
		int height = pos.size();
		int width = *max;
		Mat project = Mat::zeros(height, width, CV_8UC1);   //创建一个字符Mat
		//将该字符所在区域
		for (int i = 0; i < project.rows; i++)
		{
			for (int j = 0; j < pos[i]; j++)
			{
				project.at<uchar>(i, j) = 255;
			}
		}
		cvNamedWindow("horizational projection", 0);
		imshow("horizational projection", project);

	}
	else if (mode == V_PROJECT)
	{
		int height = *max;
		int width = pos.size();
		Mat project = Mat::zeros(height, width, CV_8UC1);
		for (int i = 0; i < project.cols; i++)
		{
			for (int j = project.rows - 1; j >= project.rows - pos[i]; j--)
			{
				//std::cout << "j:" << j << "i:" << i << std::endl;
				project.at<uchar>(j, i) = 255;
			}
		}

		imshow("vertical projection", project);
	}

	//waitKey();
}

//获取文本的投影用于分割字符(垂直,水平)

int GetTextProjection(Mat &src, vector<int>& pos, int mode)
{
    //做垂直投影,如果累计计算该列为0的个数
	if (mode == V_PROJECT)
	{
		for (int i = 0; i < src.rows; i++)
		{
			uchar* p = src.ptr<uchar>(i);
			for (int j = 0; j < src.cols; j++)
			{
				if (p[j] == 0)
				{
					pos[j]++;
				}
			}
		}

		draw_projection(pos, V_PROJECT);
	}
	else if (mode == H_PROJECT)
	{
		for (int i = 0; i < src.cols; i++)
		{

			for (int j = 0; j < src.rows; j++)
			{
				if (src.at<uchar>(j, i) == 0)
				{
					pos[j]++;
				}
			}
		}
		draw_projection(pos, H_PROJECT);

	}	

	return 0;
}

//获取每个分割字符的范围,min_thresh:波峰的最小幅度,min_range:两个波峰的最小间隔
int GetPeekRange(vector<int> &vertical_pos, vector<char_range_t> &peek_range, int min_thresh = 2, int min_range = 10)
{
	int begin = 0;
	int end = 0;
	for (int i = 0; i < vertical_pos.size(); i++)
	{

		if (vertical_pos[i] > min_thresh && begin == 0)
		{
			begin = i;
		}
		else if (vertical_pos[i] > min_thresh && begin != 0)
		{
			continue;
		}
		else if (vertical_pos[i] < min_thresh && begin != 0)
		{
			end = i;
			if (end - begin >= min_range)
			{
				char_range_t tmp;
				tmp.begin = begin;
				tmp.end = end;
				peek_range.push_back(tmp);
				begin = 0;
				end = 0;
			}

		}
		else if (vertical_pos[i] < min_thresh || begin == 0)
		{
			continue;
		}
		else
		{
			//printf("raise error!\n");
		}
	}

	return 0;
}




inline void save_cut(const Mat& img, int id)
{
	char name[128] = { 0 };
	sprintf(name, "./save_cut/%d.jpg", id);
	imwrite(name, img);
}

//切割字符
int CutChar(Mat &img, const vector<char_range_t>& v_peek_range, const vector<char_range_t>& h_peek_range, vector<Mat>& chars_set)
{
	static int count = 0;
	int norm_width = img.rows;  //因为汉字都是类正方形的,所以我们定了norm_width就是汉字的高度
	Mat show_img = img.clone();
	cvtColor(show_img, show_img, CV_GRAY2BGR);
	for (int i = 0; i < v_peek_range.size(); i++)
	{
		int char_gap = v_peek_range[i].end - v_peek_range[i].begin;
		//if (char_gap <= (int)(norm_width*1.2) && char_gap >= (int)(norm_width*0.8))
		{
			int x = v_peek_range[i].begin - 2>0 ? v_peek_range[i].begin - 2 : 0;
			int width = char_gap + 4 <= img.rows ? char_gap : img.rows;
			Rect r(x, 0, width, img.rows);			
			rectangle(show_img, r, Scalar(255, 0, 0), 1);  //在图像画出字符所在的矩形框
			Mat single_char = img(r).clone();
			chars_set.push_back(single_char);
			save_cut(single_char, count);
			count++;
		}
	}
//框出字符
//	imshow("cut", show_img);   
//	waitKey(0);
	

	return 0;
}

Mat cut_one_line(const Mat& src,int begin,int end)
{
	Mat line = src(Rect(0,begin,src.cols,end-begin)).clone();
	return line;
}


vector<Mat> CutSingleChar(Mat& img)
{
	Mat show = img.clone();
	cvtColor(show, show, CV_GRAY2BGR);
	threshold(img, img, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
	//若是黑底白字则取反
	//threshold(img, img, 0, 255, CV_THRESH_BINARY_INV | CV_THRESH_OTSU);  //CV_THRESH_BINARY_INV代替CV_THRESH_BINARY表示取反
	vector<int> horizion_pos(img.rows, 0);
	vector<char_range_t> h_peek_range;
	GetTextProjection(img, horizion_pos, H_PROJECT);
	GetPeekRange(horizion_pos, h_peek_range, 2, 10);

#if 1

	/*将每一文本行切割*/
	vector<Mat> lines_set;
	//vector<Mat> lines_set_show;
	for (int i = 0; i < h_peek_range.size(); i++)
	{
		Mat line = cut_one_line(img, h_peek_range[i].begin, h_peek_range[i].end);
		lines_set.push_back(line);		
		//Mat line_show = show(Rect(0, h_peek_range[i].begin, show.cols, h_peek_range[i].end - h_peek_range[i].begin));
		//lines_set_show.push_back(line_show);
	}

	vector<Mat> chars_set;
	for (int i = 0; i < lines_set.size(); i++)
	{
		Mat line = lines_set[i];
		//Mat line2 = lines_set_show[i];
		imshow("raw line", line);
		vector<int> vertical_pos(line.cols, 0);
		vector<char_range_t> v_peek_range;
		GetTextProjection(line, vertical_pos, V_PROJECT);
		GetPeekRange(vertical_pos, v_peek_range);
		CutChar(line, v_peek_range, h_peek_range, chars_set);
		//CutChar(line2, v_peek_range, h_peek_range, chars_set);
	}
#endif
     
	//imshow("line_show", show);
	//imwrite("show.png", show);
	return chars_set;
}

4、缩放成给定大小20*20

if (char_test.cols > char_test.rows)
	    {
		resize(char_test, char_test, Size(ocr_width, ocr_height), 0, 0);
	    }
	    else
	    {
		ratio = 1.0 * ocr_height /char_test.rows;
		resize(char_test, char_test, Size((int)(char_test.cols * ratio), ocr_height), 0, 0);
		copyMakeBorder(char_test, char_test, 0, 0, (int)(ocr_width - char_test.cols) / 2, (int)(ocr_width - char_test.cols) / 2, BORDER_CONSTANT, 255);
		resize(char_test, char_test, Size(ocr_width, ocr_height), 0, 0);
	    }

5、提取Hog特征:

Mat featureMat;
	Mat hog_test;
	     
	     Mat tempPixel;
	     Mat tempProject;
	    threshold(char_test, tempPixel, 0, 255, CV_THRESH_BINARY_INV | CV_THRESH_OTSU); //换成同模板一样的黑底白字
	    
	    resize(tempPixel, hog_test, Size(ocr_width * 4, ocr_height * 4), 0, 0);
	     featureMat = hog(hog_test);

6、放进预测好的KNN模型预测:

	     featureMat.convertTo(featureMat, CV_32F);
	    float response = KnnPredict(featureMat, model);

7、测试结果如下: 

测试样本

 分割行

水平投影

 垂直投影

字符分割结果

0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 
2 2 2 2 2 2 2 2 2 0 9 2 1 2 2 2 
3 3 3 3 3 3 3 3 3 5 0 3 3 3 3 5 
4 4 9 4 5 4 4 4 4 4 4 4 9 4 4 4 
5 5 5 0 3 5 5 6 5 1 3 5 5 5 5 5 
6 6 6 6 6 5 6 6 6 6 6 6 6 6 6 6 
4 7 7 7 7 7 7 7 7 0 1 1 2 1 1 7 
5 8 8 0 8 8 8 8 8 1 8 8 7 8 8 8 
8 7 4 4 9 9 4 9 4 6 4 4 6 9 5 9 
accuracy: test = 71.2%
*** 正常退出 ***

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值