印刷体的识别应该是一个很成熟的技术,那么应该准确率应该不低,但是直接将像素输入到svm而不做特征提取,这精确率却很低很低。同样将没有提取过特征的图像用knn来分类的效果也不是很好。
通过这个案例,只是得到的一个经验是在使用机器学习做分类前最好提取特征,而且不同的数据集训练而不能用在林外一个数据集中测试。因为尝试很多方法,其中的代码有很多的冗余。
#include<iostream>
#include<opencv2/opencv.hpp>
#include<io.h>
#include<direct.h>
#include<opencv2/ml.hpp>
//#include<stack>
using namespace std;
using namespace cv;
using namespace ml;
Point findPicture(Mat &src,Mat &temp);
int* verticalClip(Mat &gray);
void findconnect(Mat &gray);
void getConnectedDomain(Mat& src, vector<Rect>& boundingbox);
void produceTrainData();
void fun1();
int createDirectory(std::string path);
int doOCR(Mat &testData);
void trainSVM();
int testSVM(Mat &);
int* cutNum(Mat &gray);
long long matchTemp(Mat &src, Mat &temp);
double maxMatch(Mat &src, Mat &temp);
void feature(Mat dealimage);
double compare(Mat &src, Mat &sample);
void fun();
ostringstream oss;//结合字符串和数字
int num = -1;
Mat dealimage;
Mat src;
Mat yangben_gray;
Mat yangben_thresh;
const int classsum = 10;//训练图片共有10类,可修改
const int imagesSum = 1;//每类有500张图片,可修改
int yangben_data_position = -1;
Mat data_mat = Mat(classsum*imagesSum, 8100, CV_32FC1);
int main()
{
Mat pic = imread("D://dataset//012PIC//1202914.jpg");
Mat pic2,pic3;
pic(Rect(650, 0, 500, 50)).copyTo(pic2);
Mat temp = imread("D://dataset//template.jpg");
Mat temp1 = imread("D://dataset//temp2.jpg");
Point matchLocation = findPicture(pic2,temp);//寻找匹配位置
Point matchLocation1 = findPicture(pic2, temp1);
rectangle(pic2,matchLocation,Point(matchLocation.x+temp.cols, matchLocation.y + temp.rows),Scalar(0,0,255));//标注匹配到的位置
rectangle(pic2, matchLocation1, Point(matchLocation1.x + temp1.cols, matchLocation1.y + temp1.rows), Scalar(0, 255, 0));
Mat Num;
pic2(Rect(matchLocation.x+temp.cols+1,matchLocation.y,matchLocation1.x-matchLocation.x-temp1.cols,temp1.rows)).copyTo(Num);
Mat gray;
cvtColor(Num,gray,COLOR_BGR2GRAY);
threshold(gray,gray,100,255,THRESH_BINARY);
int *p=verticalClip(gray);//垂直方向上的投影,分割字符
//int *p = cutNum(gray);
for(int i=0;p[i]>=-2&&p[i]<=9;i++)
{
cout << p[i] << endl;
}
delete p;
//fun();
namedWindow("pic");
imshow("pic",pic);
namedWindow("pic2");
imshow("pic2",pic2);
namedWindow("Num");
imshow("Num", gray);
//fun1();
//trainSVM();
//cutNum();
waitKey(0);
return 0;
}
Point findPicture(Mat &src, Mat &temp) //找到模板匹配的
{
Mat out1;
out1.create(src.rows - temp.rows + 1, src.cols - temp.cols, src.type());
matchTemplate(src, temp, out1, CV_TM_CCORR);
normalize(out1, out1, 0, 1, NORM_MINMAX);
Point minLocation, maxLocation;
double min, max;
minMaxLoc(out1, &min, &max, &minLocation, &maxLocation, Mat());
return maxLocation;
}
int* verticalClip(Mat &gray)//用result存储最后的结果
{
int *Num = new int[gray.cols];
memset(Num, 0, 4 * gray.cols);
//统计每列像素值为0的数量
for (int i = 0; i < gray.rows; i++)
{
for (int j = 0; j < gray.cols; j++)
{
if (gray.ptr<uchar>(i)[j] == 0) {
Num[j]++;
//cout << int(gray.ptr<uchar>(i)[j]);
}
}
}
//
//for (int i = 0; i < gray.cols; i++) { cout << Num[i] << endl; }
//cout << gray.cols << endl;
//画投影后的直方图
Mat vertical(gray.rows, gray.cols, CV_8UC1, Scalar(255));
for (int i = 0; i < gray.rows; i++)
{
for (int j = 0; j < gray.cols; j++)
{
if (i >= gray.rows - Num[j]) { vertical.ptr<uchar>(i)[j] = 0; }
}
}
//提取分割线
int arr[10], length = 0;
//arr[0] = 0;
int arr1[10] ;
for (int i = 0,j=0; i+1 < gray.cols; i++)
{
if (Num[i] > 0 && Num[i + 1] == 0) { arr[length] = i; length++; }//记录分割的列数
if (Num[i] == 0 && Num[i + 1] > 0) { arr1[j] = i; j++; }
}
//arr[length] = gray.cols;
//length++;
//分割
vector<Mat> result(8);
//Mat mm;
//
char addr[128];
//long long res=82875;
int index;
for (int i=0; i < length; i++)
{
//mm.push_back(gray(Rect(arr[i], 0, arr[i + 1]- arr[i], gray.rows)));
gray(Rect(arr1[i], 0, arr[i] - arr1[i], gray.rows)).copyTo(result[i]);
//sprintf_s(addr, "D://dataset//temp//%d.jpg",i);
//imwrite(addr,result[i]);
}
//gray(Rect(arr[0], 0, arr[0 + 1] - arr[0], gray.rows)).copyTo(mm);
int *pNumber=new int[length];
for (int i = length-1; i >=0; i--)
{
if (i != length - 3)
{
result[i] = 255 - result[i];//翻转像素值
//resize(result[i], result[i], Size(20, 20));
/*namedWindow("vertical");
imshow("vertical", result[0]);*/
//result[i].reshape(1, 1).convertTo(result[i], CV_32F);
//int response = doOCR(result[i]);
int response = testSVM(result[i]);
if (response <= 9 && response >= 0)
{
//cout << response<< endl;
pNumber[i] = response;
}
else
{
//cout << response << endl;
pNumber[i] = -2;
}
}
else//遇到小数点
{
pNumber[i] = -1;
}
}
delete Num;
//delete pNumber;
namedWindow("vertical");
imshow("vertical",result[3]);
return pNumber;
}
//根据opencv给出的图片,生成训练样本
void produceTrainData()
{
Mat src = imread("D://digitPicture//digits.png"),gray;
char addr[128] = {0};
cvtColor(src,gray,CV_BGR2GRAY);
namedWindow("source");
imshow("source",src);
int wh = 20;
int m = gray.rows / wh;//行偏移
int n = gray.cols / wh;//列偏移
int fileName = 0, fileNum = 0;
for (int i = 0; i < m;i++)
{
int offsetRow = i*wh;
if (i%5==0&&i!=0)
{
fileName++; fileNum=0;
}
for (int j = 0; j < n;j++)
{
int offsetCol = j*wh;
sprintf_s(addr,"D:\\digitPicture\\%d\\%d.jpg",fileName,fileNum++);
Mat tmp;
//gray(Range(offsetRow, offsetRow + wh), Range(offsetCol, offsetCol + wh)).copyTo(tmp);
gray(Rect( offsetCol,offsetRow, wh, wh)).copyTo(tmp);
imwrite(addr,tmp);
}
}
}
void fun1(){//提取感兴趣区域roi
char ad[128] = { 0 };
int filename = 0, filenum = 0;
Mat img = imread("D://digitPicture//digits.png");
Mat gray;
cvtColor(img, gray, CV_BGR2GRAY);
int b = 20;
int m = gray.rows / b; //原图为1000*2000
int n = gray.cols / b; //裁剪为5000个20*20的小图块
namedWindow("gray");
imshow("gray", gray);
for (int i = 0; i < m; i++)
{
int offsetRow = i*b; //行上的偏移量
if (i % 5 == 0 && i != 0)
{
filename++;
filenum = 0;
}
for (int j = 0; j < n; j++)
{
int offsetCol = j*b; //列上的偏移量
char ad1[128] = {0};
sprintf_s(ad1, "D://data//%d//", filename);//出现问题,不保存图片也不报错,原因是地址不存在,应该先建立地址
sprintf_s(ad, "D://data//%d//%d.jpg" , filename, filenum++);//图片正常保存,但是地址错误
createDirectory(ad1);
//截取20*20的小块
Mat tmp;
//gray(Range(offsetRow, offsetRow + b), Range(offsetCol, offsetCol + b)).copyTo(tmp);
gray(Rect(offsetCol, offsetRow, b, b)).copyTo(tmp);
imshow("qwe",tmp);
imwrite(ad, tmp);
//if (imwrite(ad, tmp)) { cout << "write success!!"; };
//waitKey(0);
}
}
return;
}
int createDirectory(std::string path)
{
int len = path.length();
char tmpDirPath[256] = { 0 };
for (int i = 0; i < len; i++)
{
tmpDirPath[i] = path[i];
if (tmpDirPath[i] == '\\' || tmpDirPath[i] == '/')
{
if (_access(tmpDirPath, 0) == -1)
{
int ret = _mkdir(tmpDirPath);
if (ret == -1) return ret;
}
}
}
return 0;
}
int doOCR(Mat &testData) //做 knn
{
char ad[128] = {0};
Mat traindata, trainlabel;
int k = 3, testnum = 0, truenum = 0;
//读取训练数据 4000张
/*for (int i = 0; i < 10; i++)
{
for (int j = 0; j<400; j++)
{
sprintf_s(ad, "D:\\data\\%d\\%d.jpg", i, j);
Mat srcimage = imread(ad);
cvtColor(srcimage,srcimage,CV_BGR2GRAY);//出错位置,没有将样本转变为灰度图
srcimage = srcimage.reshape(1, 1);
traindata.push_back(srcimage);
trainlabel.push_back(i);
}
}*/
for (int i = 0; i < 10; i++)
{
sprintf_s(ad, "D://dataset//temp//%d.jpg", i);
Mat srcimage = imread(ad);
cvtColor(srcimage, srcimage, CV_BGR2GRAY);
threshold(srcimage, srcimage, 0, 255, THRESH_OTSU);
resize(srcimage, srcimage,Size(20,20));
srcimage = srcimage.reshape(1, 1);
traindata.push_back(srcimage);
trainlabel.push_back(i);
}
traindata.convertTo(traindata, CV_32F);
//训练
auto KNN = cv::ml::KNearest::create();
KNN->setDefaultK(k);
KNN->setIsClassifier(true);
KNN->setAlgorithmType(cv::ml::KNearest::BRUTE_FORCE);
KNN->train(traindata, cv::ml::ROW_SAMPLE, trainlabel);
//CvKNearest knn(traindata, trainlabel, cv::Mat(), false, k);
//cv::Mat nearests(1, k, CV_32F);
//读取测试数据 1000张
/*namedWindow("vertical");
imshow("vertical", testData);*/
Mat dummy;
resize(testData,testData,Size(20,20));//更改
testData=testData.reshape(1,1);
//cout << testData.size() << endl;
testData.convertTo(testData,CV_32F);
//cout<<(testData.type()==CV_32F) << endl;
int response = KNN->findNearest(testData, KNN->getDefaultK(), dummy);
/*for (int i = 0; i < 10; i++)
{
for (int j = 400; j<500; j++)
{
testnum++;
sprintf_s(ad, "D:\\data\\%d\\%d.jpg", i, j);
Mat testdata = imread(ad);
testdata = testdata.reshape(1, 1);
testdata.convertTo(testdata, CV_32F);
Mat dummy;
int response = KNN->findNearest(testdata, KNN->getDefaultK(), dummy);
if (response == i)
{
truenum++;
}
}
}
cout << "测试总数" << testnum << endl;
cout << "正确分类数" << truenum << endl;
cout << "准确率:" << (float)truenum / testnum * 100 << "%" << endl;*/
return response;
}
void trainSVM()
{
char ad[128] = { 0 };
Mat traindata, trainlabel;
int k = 5, testnum = 0, truenum = 0;
//读取训练数据 4000张
for (int i = 0; i < 10; i++)
{
for (int j = 0; j<500; j++)
{
sprintf_s(ad, "D:\\data\\%d\\%d.jpg", i, j);
Mat srcimage = imread(ad);
cvtColor(srcimage, srcimage, CV_BGR2GRAY);//出错位置,没有将样本转变为灰度图
threshold(srcimage,srcimage,0,255,THRESH_OTSU);
yangben_data_position += 1;
feature(srcimage);
//srcimage = srcimage.reshape(1, 1);
//traindata.push_back(srcimage);
trainlabel.push_back(i);
}
}
traindata.convertTo(traindata, CV_32F);
Ptr<ml::SVM> svm;
cout << "training SVM..." << endl;
svm = ml::SVM::create();
svm->setType(ml::SVM::C_SVC);
svm->setKernel(ml::SVM::LINEAR);
svm->setDegree(0);
svm->setGamma(1);
svm->setCoef0(0);
svm->setC(1);
svm->setNu(0);
svm->setP(0);
svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01));
svm->train(data_mat, cv::ml::ROW_SAMPLE, trainlabel);//更改位置
svm->save("svm.xml");
cout << "SVM Train Finish!!!" << endl;
}
int testSVM(Mat &testData)
{
Ptr<ml::SVM> svm = ml::SVM::create();
//testData = testData.reshape(1, 1);
//testData.convertTo(testData, CV_32F);
svm = ml::StatModel::load<ml::SVM>("hog+svm.xml");
Mat trainTempImg = Mat::zeros(Size(128, 128), CV_8UC1);
resize(testData, trainTempImg, trainTempImg.size());//更改位置
HOGDescriptor *hog = new HOGDescriptor(Size(128, 128), Size(16, 16), Size(8, 8), Size(8, 8), 9);
vector<float>descriptors;//结果数组
hog->compute(trainTempImg, descriptors, Size(1, 1), Size(0, 0));
//cout << "HOG描述子向量维数 " << descriptors.size() << endl;
Mat SVMtrainMat = Mat(1, descriptors.size(), CV_32FC1);
int number1 = descriptors.size();
//将计算好的HOG描述子复制到样本特征矩阵SVMtrainMat
for (int i = 0; i < number1; i++)
{
//把一幅图像的HOG描述子向量依次存入data_mat矩阵的同一列
//因为输入图像只有一个,即SVMtrainMat只有一列,则为0
SVMtrainMat.at<float>(0, i) = descriptors[i]; // n++;
}
SVMtrainMat.convertTo(SVMtrainMat, CV_32FC1);
int response = (int)svm->predict(SVMtrainMat);//更改
return response;
}
int* cutNum(Mat &gray) //使用模板匹配的方法计算两图片的相似度
{
int *Num = new int[gray.cols];
memset(Num, 0, 4 * gray.cols);
//统计每列像素值为0的数量
for (int i = 0; i < gray.rows; i++)
{
for (int j = 0; j < gray.cols; j++)
{
if (gray.ptr<uchar>(i)[j] == 0) {
Num[j]++;
}
}
}
//画直方图
Mat vertical(gray.rows, gray.cols, CV_8UC1, Scalar(255));
for (int i = 0; i < gray.rows; i++)
{
for (int j = 0; j < gray.cols; j++)
{
if (i >= gray.rows - Num[j]) { vertical.ptr<uchar>(i)[j] = 0; }
}
}
//提取分割线
int arr[10], length = 0;
int arr1[10];
for (int i = 0, j = 0; i + 1 < gray.cols; i++)
{
if (Num[i] > 0 && Num[i + 1] == 0) { arr[length] = i+1; length++; }//arr1为后面分割线
if (Num[i] == 0 && Num[i + 1] > 0) { arr1[j] = i; j++; }//Num中装的是0像素值的个数,arr大于arr1为数字前面的分割线
}
//arr[length] = gray.cols;
//length++;
//分割
vector<Mat> result(8);
//Mat mm;
//
char addr[128];
double res = 0;
int index,j;
int *pNumber = new int[length];
for (int i = 0; i < length; i++)
{
gray(Rect(arr1[i], 0, arr[i] - arr1[i], gray.rows)).copyTo(result[i]);//result存储顺序是重做至右
imshow("resulti", result[i]);
//sprintf_s(addr, "D://dataset//new//%d.jpg", i);
//imwrite(addr,result[i]);
if (i != length - 3)
{
for (j = 0; j <= 9; j++)
{
sprintf_s(addr, "D://dataset//temp//%d.jpg", j);
Mat mat = imread(addr, IMREAD_GRAYSCALE);
threshold(mat,mat,100,255,THRESH_BINARY);
//long long m = matchTemp(result[i], mat);
//double m = maxMatch(result[i], mat);
//double m = maxMatch(result[i], mat);
double m = compare(result[i], mat);
if (m > res)
{
res = m;
index = j;
}//找到一个相关系数最大的值
}
pNumber[i] = index;
res = 0;
index = 0;
}
else//去掉小数点
{
pNumber[i] = -1;
}
}
namedWindow("result");
imshow("result",result[3]);
return pNumber;
}
long long matchTemp(Mat &src,Mat &temp) //自建立的模板
{
if (src.size()!=temp.size())
{
resize(src,src,temp.size());
}
//cvAbsDiff(src,temp,src);
src = abs(temp - src);
long long res=0;
for (int i = 0; i < temp.rows; i++)
{
for (int j=0;j<temp.cols;j++)
{
res = res + src.at<uchar>(i,j);
}
}
return res;
}
double maxMatch(Mat &src,Mat &temp)
{
/*if (src.size() == temp.size())
{ Mat a;
a .create(Size(25,25),src.type());
a = a + src;
}*/
copyMakeBorder(src,src,25-src.rows,0,25-src.cols,0,BORDER_CONSTANT,255);//填充255
if (src.type() != temp.type())
{
return -1;
}
imshow("src",src);
imshow("temp", temp);
Mat out1;
out1.create(src.rows - temp.rows + 1, src.cols - temp.cols+1, src.type());
matchTemplate(src, temp, out1, CV_TM_CCORR);
normalize(out1, out1, 0, 1, NORM_MINMAX);
Point minLocation, maxLocation;
double min, max;
minMaxLoc(out1, &min, &max, &minLocation, &maxLocation, Mat());
return max;
}
void feature(Mat dealimage)//提取hog特征
{
//把训练样本放大到128,128。便于HOG提取特征
Mat trainImg = Mat(Size(128, 128), CV_8UC1);
resize(dealimage, trainImg, trainImg.size());
//处理HOG特征
//检测窗口(64,128),块尺寸(16,16),块步长(8,8),cell尺寸(8,8),直方图bin个数9 ,需要修改
HOGDescriptor *hog = new HOGDescriptor(Size(128, 128), Size(16, 16), Size(8, 8), Size(8, 8), 9);
vector<float>descriptors;//存放结果 为HOG描述子向量
hog->compute(trainImg, descriptors, Size(1, 1), Size(0, 0)); //Hog特征计算,检测窗口移动步长(1,1)
//cout << "HOG描述子向量维数 : " << descriptors.size() << endl;
for (vector<float>::size_type j = 0; j < descriptors.size(); j++)
{
//把一幅图像的HOG描述子向量依次存入data_mat矩阵的同一列
data_mat.at<float>(yangben_data_position, j) = descriptors[j];
}
}
double compare(Mat &src, Mat &sample)
{
double same = 0.0, difPoint = 0.0;
Mat now;
resize(sample, now, src.size());
int row = now.rows;
int col = now.cols * now.channels();
for (int i = 0; i < 1; i++) {
uchar * data1 = src.ptr<uchar>(i);
uchar * data2 = now.ptr<uchar>(i);
for (int j = 0; j < row * col; j++) {
int a = data1[j];
int b = data2[j];
if (a == b)same++;
else difPoint++;
}
}
return same / (same + difPoint);
}
void fun()
{
Mat trainingData;
//训练样本标签
Mat labels;
//最终的训练样本标签
Mat clas;
//最终的训练数据
Mat traindata;
//从指定文件夹下提取图片//
for (int p = 0; p < classsum; p++)//依次提取0到9文件夹中的图片
{
oss << "D://dataset//temp//";
num += 1;//num从0到9
int label = num;
oss << num << "*.jpg";//图片名字后缀,oss可以结合数字与字符串
string pattern = oss.str();//oss.str()输出oss字符串,并且赋给pattern
oss.str("");//每次循环后把oss字符串清空
vector<Mat> input_images;
vector<String> input_images_name;
cv::glob(pattern, input_images_name, false);
//为false时,仅仅遍历指定文件夹内符合模式的文件,当为true时,会同时遍历指定文件夹的子文件夹
//此时input_images_name存放符合条件的图片地址
int all_num = input_images_name.size();
//文件下总共有几个图片
//cout << num << ":总共有" << all_num << "个图片待测试" << endl;
for (int i = 0; i < imagesSum; i++)//依次循环遍历每个文件夹中的图片
{
cvtColor(imread(input_images_name[i]), yangben_gray, COLOR_BGR2GRAY);//灰度变换
threshold(yangben_gray, yangben_thresh, 0, 255, THRESH_OTSU);//二值化
//循环读取每张图片并且依次放在vector<Mat> input_images内
input_images.push_back(yangben_thresh);
dealimage = input_images[i];
//选择了HOG的方式完成特征提取工作
yangben_data_position += 1;//代表为第几幅图像
feature(dealimage);//图片特征提取
labels.push_back(label);//把每个图片对应的标签依次存入
cout << "第" << yangben_data_position << "样本正在提取HOG特征" << endl;
}
}
cout << "样本特征提取完毕,等待创建SVM模型" << endl;
Ptr<SVM> SVM_params = SVM::create();
SVM_params->setType(SVM::C_SVC);//C_SVC用于分类,C_SVR用于回归
SVM_params->setKernel(SVM::LINEAR); //LINEAR线性核函数。SIGMOID为高斯核函数
SVM_params->setDegree(0);//核函数中的参数degree,针对多项式核函数;
SVM_params->setGamma(1);//核函数中的参数gamma,针对多项式/RBF/SIGMOID核函数;
SVM_params->setCoef0(0);//核函数中的参数,针对多项式/SIGMOID核函数;
SVM_params->setC(1);//SVM最优问题参数,设置C-SVC,EPS_SVR和NU_SVR的参数;
SVM_params->setNu(0);//SVM最优问题参数,设置NU_SVC, ONE_CLASS 和NU_SVR的参数;
SVM_params->setP(0);//SVM最优问题参数,设置EPS_SVR 中损失函数p的值.
//结束条件,即训练1000次或者误差小于0.01结束
SVM_params->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01));
//训练数据和标签的结合
Ptr<TrainData> tData = TrainData::create(data_mat, ROW_SAMPLE, labels);
// 训练分类器
SVM_params->train(tData);//训练
//保存模型
SVM_params->save("hog+svm.xml");
cout << "训练好了!!!" << endl;
}