1、video.h用摄像头拍摄数据,并将视频数据转为图像;
#include "opencv2/opencv.hpp"
#include "highgui.h"
#include <iostream>
#include <vector>
#include <string>
using namespace cv;
using namespace std;
VideoCapture CreateCapture(int argc, char** argv);
vector<Mat> ReadPictureFromViedeo(VideoCapture& capture);
VideoCapture CreateCapture(int argc, char* argv[])
{
VideoCapture capture;
if(argc == 1)
{
capture.open(1);
if (!capture.isOpened())
{
cerr << "ERROR ! Unable to open camera\n";
return -1;
}
}
else{
capture.open(argv[1]);
if(!capture.isOpened())
{
cerr << "ERROR ! The video does not exist\n";
return -1;
}
}
return capture;
}
vector<Mat> ReadPictureFromViedeo(VideoCapture& capture)
{
cvNamedWindow("video");
vector<Mat> image;
Mat frame;
int number = 0;
while (1)
{
capture>>frame;
if (frame.empty() )
{
cerr << "ERROR ! blank frame grabbed\n";
break;
}
imshow("viedo", frame);
stringstream ss;
ss << "image_" << number << ".png";
string ImageName = "/home/znkz/gzh/c++/test/build/Data/" + ss.str();
imwrite(ImageName.c_str(), frame);
image.push_back(frame);
number++;
//wait for a key with timeout long enough to show images
if (waitKey(5) >= 0)
break;
}
return image;
}
2、特征描述
Mat hog(Mat& img)
{
HOGDescriptor hog(Size(32, 32), Size(8, 8), Size(4, 4), Size(4, 4), 4);
vector<float> descriptors; //HOG描述子向量
hog.compute(img, descriptors, Size(4, 4)); //计算HOG描述子,检测窗口移动步长(8,8)
int DescriptorDim = descriptors.size();
Mat featureMat = Mat::zeros(1, DescriptorDim, CV_32F);
for (int i = 0; i<DescriptorDim; i++)
featureMat.at<float>(0, i) = descriptors[i];
return featureMat;
}
3、提取训练数据
void getTrainData(string filePath, Mat& trainData, Mat& trainLabels, Mat& testData, Mat& testLabels)
{
Mat image = imread(filePath);
Mat gray;
cvtColor(image, gray, CV_BGR2GRAY);
threshold(gray, gray, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
int b = 20;
int m = image.rows/b;
int n = image.cols/b;
Mat data, labels;
//注意先列后行,不然取得的训练数据只有0-6,没有后面的数据,可以考虑顺序打乱,使数据更加均匀
for (int i = 0;i < n; i++)
{
int leftTopCol = i * b;
for (int j = 0; j < m; j++)
{
int leftTopRow = j * b; //图像块左上角所在行
//取出图像块
Mat tempPixel;
Mat tempProject;
Mat temp;
Mat featureMat;
gray(Range(leftTopRow, leftTopRow + b), Range(leftTopCol, leftTopCol + b)).copyTo(tempPixel);
/* tempProject = charFeatures(tempPixel, 20);
tempPixel = tempPixel.reshape(0, 1);
tempPixel.convertTo(tempPixel, CV_32F);
tempProject.convertTo(tempProject, CV_32F);
hconcat(tempPixel, tempProject, temp);*/
//HOG提取特征
Mat hog_train;
resize(tempPixel, hog_train, Size(20 * 4, 20 * 4), 0, 0);
featureMat = hog(hog_train);
featureMat.convertTo(featureMat, CV_32F);
data.push_back(featureMat);
labels.push_back((int)j / 5);
}
}
//分配训练数据和测试数据
data.convertTo(data, CV_32F); //TrainData::create格式需求
int sampleNum = data.rows;
int trainNum = 5000;
int testNum = sampleNum - trainNum;
trainData = data(Range(0, trainNum), Range::all());
trainLabels = labels(Range(0, trainNum), Range::all());
testData = data(Range(trainNum, sampleNum), Range::all());
testLabels = labels(Range(trainNum, sampleNum), Range::all());
}
3、文字分割
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
#include <algorithm>
#include <vector>
using namespace cv;
using namespace std;
#define V_PROJECT 1
#define H_PROJECT 2
typedef struct
{
int begin;
int end;
}char_range_t;
void draw_projection(vector<int>& pos, int mode)
{
//vector<int>::iterator max = std::max_element(std::begin(pos), std::end(pos)); //求最大值
vector<int>::iterator max = std::max_element(pos.begin(), pos.end()); //求最大值
if (mode == H_PROJECT)
{
int height = pos.size();
int width = *max;
Mat project = Mat::zeros(height, width, CV_8UC1); //创建一个字符Mat
//将该字符所在区域
for (int i = 0; i < project.rows; i++)
{
for (int j = 0; j < pos[i]; j++)
{
project.at<uchar>(i, j) = 255;
}
}
cvNamedWindow("horizational projection", 0);
imshow("horizational projection", project);
}
else if (mode == V_PROJECT)
{
int height = *max;
int width = pos.size();
Mat project = Mat::zeros(height, width, CV_8UC1);
for (int i = 0; i < project.cols; i++)
{
for (int j = project.rows - 1; j >= project.rows - pos[i]; j--)
{
//std::cout << "j:" << j << "i:" << i << std::endl;
project.at<uchar>(j, i) = 255;
}
}
imshow("vertical projection", project);
}
//waitKey();
}
//获取文本的投影用于分割字符(垂直,水平)
int GetTextProjection(Mat &src, vector<int>& pos, int mode)
{
//做垂直投影,如果累计计算该列为0的个数
if (mode == V_PROJECT)
{
for (int i = 0; i < src.rows; i++)
{
uchar* p = src.ptr<uchar>(i);
for (int j = 0; j < src.cols; j++)
{
if (p[j] == 0)
{
pos[j]++;
}
}
}
draw_projection(pos, V_PROJECT);
}
else if (mode == H_PROJECT)
{
for (int i = 0; i < src.cols; i++)
{
for (int j = 0; j < src.rows; j++)
{
if (src.at<uchar>(j, i) == 0)
{
pos[j]++;
}
}
}
draw_projection(pos, H_PROJECT);
}
return 0;
}
//获取每个分割字符的范围,min_thresh:波峰的最小幅度,min_range:两个波峰的最小间隔
int GetPeekRange(vector<int> &vertical_pos, vector<char_range_t> &peek_range, int min_thresh = 2, int min_range = 10)
{
int begin = 0;
int end = 0;
for (int i = 0; i < vertical_pos.size(); i++)
{
if (vertical_pos[i] > min_thresh && begin == 0)
{
begin = i;
}
else if (vertical_pos[i] > min_thresh && begin != 0)
{
continue;
}
else if (vertical_pos[i] < min_thresh && begin != 0)
{
end = i;
if (end - begin >= min_range)
{
char_range_t tmp;
tmp.begin = begin;
tmp.end = end;
peek_range.push_back(tmp);
begin = 0;
end = 0;
}
}
else if (vertical_pos[i] < min_thresh || begin == 0)
{
continue;
}
else
{
//printf("raise error!\n");
}
}
return 0;
}
inline void save_cut(const Mat& img, int id)
{
char name[128] = { 0 };
sprintf(name, "./save_cut/%d.jpg", id);
imwrite(name, img);
}
//切割字符
int CutChar(Mat &img, const vector<char_range_t>& v_peek_range, const vector<char_range_t>& h_peek_range, vector<Mat>& chars_set)
{
static int count = 0;
int norm_width = img.rows; //因为汉字都是类正方形的,所以我们定了norm_width就是汉字的高度
Mat show_img = img.clone();
cvtColor(show_img, show_img, CV_GRAY2BGR);
for (int i = 0; i < v_peek_range.size(); i++)
{
int char_gap = v_peek_range[i].end - v_peek_range[i].begin;
//if (char_gap <= (int)(norm_width*1.2) && char_gap >= (int)(norm_width*0.8))
{
int x = v_peek_range[i].begin - 2>0 ? v_peek_range[i].begin - 2 : 0;
int width = char_gap + 4 <= img.rows ? char_gap : img.rows;
Rect r(x, 0, width, img.rows);
rectangle(show_img, r, Scalar(255, 0, 0), 1); //在图像画出字符所在的矩形框
Mat single_char = img(r).clone();
chars_set.push_back(single_char);
save_cut(single_char, count);
count++;
}
}
//框出字符
// imshow("cut", show_img);
// waitKey(0);
return 0;
}
Mat cut_one_line(const Mat& src,int begin,int end)
{
Mat line = src(Rect(0,begin,src.cols,end-begin)).clone();
return line;
}
vector<Mat> CutSingleChar(Mat& img)
{
Mat show = img.clone();
cvtColor(show, show, CV_GRAY2BGR);
threshold(img, img, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
//若是黑底白字则取反
//threshold(img, img, 0, 255, CV_THRESH_BINARY_INV | CV_THRESH_OTSU); //CV_THRESH_BINARY_INV代替CV_THRESH_BINARY表示取反
vector<int> horizion_pos(img.rows, 0);
vector<char_range_t> h_peek_range;
GetTextProjection(img, horizion_pos, H_PROJECT);
GetPeekRange(horizion_pos, h_peek_range, 2, 10);
#if 1
/*将每一文本行切割*/
vector<Mat> lines_set;
//vector<Mat> lines_set_show;
for (int i = 0; i < h_peek_range.size(); i++)
{
Mat line = cut_one_line(img, h_peek_range[i].begin, h_peek_range[i].end);
lines_set.push_back(line);
//Mat line_show = show(Rect(0, h_peek_range[i].begin, show.cols, h_peek_range[i].end - h_peek_range[i].begin));
//lines_set_show.push_back(line_show);
}
vector<Mat> chars_set;
for (int i = 0; i < lines_set.size(); i++)
{
Mat line = lines_set[i];
//Mat line2 = lines_set_show[i];
imshow("raw line", line);
vector<int> vertical_pos(line.cols, 0);
vector<char_range_t> v_peek_range;
GetTextProjection(line, vertical_pos, V_PROJECT);
GetPeekRange(vertical_pos, v_peek_range);
CutChar(line, v_peek_range, h_peek_range, chars_set);
//CutChar(line2, v_peek_range, h_peek_range, chars_set);
}
#endif
//imshow("line_show", show);
//imwrite("show.png", show);
return chars_set;
}
4、缩放成给定大小20*20
if (char_test.cols > char_test.rows)
{
resize(char_test, char_test, Size(ocr_width, ocr_height), 0, 0);
}
else
{
ratio = 1.0 * ocr_height /char_test.rows;
resize(char_test, char_test, Size((int)(char_test.cols * ratio), ocr_height), 0, 0);
copyMakeBorder(char_test, char_test, 0, 0, (int)(ocr_width - char_test.cols) / 2, (int)(ocr_width - char_test.cols) / 2, BORDER_CONSTANT, 255);
resize(char_test, char_test, Size(ocr_width, ocr_height), 0, 0);
}
5、提取Hog特征:
Mat featureMat;
Mat hog_test;
Mat tempPixel;
Mat tempProject;
threshold(char_test, tempPixel, 0, 255, CV_THRESH_BINARY_INV | CV_THRESH_OTSU); //换成同模板一样的黑底白字
resize(tempPixel, hog_test, Size(ocr_width * 4, ocr_height * 4), 0, 0);
featureMat = hog(hog_test);
6、放进预测好的KNN模型预测:
featureMat.convertTo(featureMat, CV_32F);
float response = KnnPredict(featureMat, model);
7、测试结果如下:
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
2 2 2 2 2 2 2 2 2 0 9 2 1 2 2 2
3 3 3 3 3 3 3 3 3 5 0 3 3 3 3 5
4 4 9 4 5 4 4 4 4 4 4 4 9 4 4 4
5 5 5 0 3 5 5 6 5 1 3 5 5 5 5 5
6 6 6 6 6 5 6 6 6 6 6 6 6 6 6 6
4 7 7 7 7 7 7 7 7 0 1 1 2 1 1 7
5 8 8 0 8 8 8 8 8 1 8 8 7 8 8 8
8 7 4 4 9 9 4 9 4 6 4 4 6 9 5 9
accuracy: test = 71.2%
*** 正常退出 ***