车牌识别大致可分为车牌定位、车牌倾斜矫正、车牌字符切割、车牌字符识别几个步骤。以前做过一个车牌识别的项目,其中字符识别部分是采用模板匹配的方式,该方式效率较低,且准确率不够高,抗干扰能力差。
后将其改用基于HOG特征和ANN的识别方式,有效提高了识别效率、准确率及抗干扰能力。现将该方式分享给大家,自己也作一个备忘。
一、准备工作
收集足够的车牌字符图片(获取途径:网上下载或通过车牌大图进行车牌定位、倾斜矫正、字符切割等算法得到车牌字符图片),人工将其分好类,放置在不同的文件夹下,并将其resize为同样的大小(如16*32)。如下图所示:
为方便进行标签,每种字符图片收集了200张,共37种,由于收集难度大(往往只能获取到本省或者邻省的车牌图片),只有湘、鄂全是通过摄像头识别真实车牌采集得到,其他省的字符部分由摄像头识别采集,部分为网络图片识别获取。
将文件夹名与汉字对应起来,共37种,如下图所示:
车牌训练集下载地址: https://download.csdn.net/download/fangye945a/12741133
二、车牌字符模型训练与识别预测
话不多说,直接上代码。如下代码基于opencv2.4.9,包含车牌汉字模型训练、预测(识别一张字符图片)、循环预测(识别大量测试集图片,用于测试准确率)功能。
#include <iostream>
#include <cstring>
#include <vector>
#include <sys/time.h>
#include "opencv/cv.h"
#include "opencv/highgui.h"
#include "opencv/ml.h"
#include "opencv/cxcore.h"
#include "contrib.hpp"
#include <stdlib.h>
enum STATES_OPT //状态
{
TRAIN = 1,
TEST,
CYCLETEST,
}sta;
using namespace cv;
using namespace std;
//#define GAMMA
#define HOG_SIZE 256 //32*16字符图片的hog算子维度
#define PIC_NUM 200*37 //字符图片张数
#define CLASSNUM 37 //种类
/******************* 全局变量定义 **********************/
vector<float> descriptors; //hog特征存放向量
float data[PIC_NUM][HOG_SIZE]={0}; //样本特征存放数组
float f[1][HOG_SIZE];
float dataCls[PIC_NUM][CLASSNUM]={0}; //样本所属类别
int mClass; //训练样本所属类别
int dNum; //训练样本个数
float fGamma = 1/2.2; //Gamma矫正参数
/********************************************************/
char typetable[CLASSNUM][3]={"0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18",
"19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36"};
//与序号对应的车牌汉字字符utf-8
char tablename[CLASSNUM][4]={
{0xE5,0xB7,0x9D,0x00},{0xE9,0x84,0x82,0x00},
{0xE7,0x94,0x98,0x00},{0xE8,0xB5,0xA3,0x00},
{0xE8,0xB4,0xB5,0x00},{0xE6,0xA1,0x82,0x00},
{0xE9,0xBB,0x91,0x00},{0xE6,0xB2,0xAA,0x00},
{0xE5,0x90,0x89,0x00},{0xE5,0x86,0x80,0x00},
{0xE6,0x99,0x8B,0x00},{0xE6,0xB4,0xA5,0x00},
{0xE4,0xBA,0xAC,0x00},{0xE8,0xBE,0xBD,0x00},
{0xE9,0xB2,0x81,0x00},{0xE8,0x92,0x99,0x00},
{0xE9,0x97,0xBD,0x00},{0xE5,0xAE,0x81,0x00},
{0xE9,0x9D,0x92,0x00},{0xE7,0x90,0xBC,0x00},
{0xE9,0x99,0x95,0x00},{0xE8,0x8B,0x8F,0x00},
{0xE7,0x9A,0x96,0x00},{0xE6,0xB9,0x98,0x00},
{0xE6,0x96,0xB0,0x00},{0xE6,0xB8,0x9D,0x00},
{0xE8,0xB1,0xAB,0x00},{0xE7,0xB2,0xA4,0x00},
{0xE4,0xBA,0x91,0x00},{0xE8,0x97,0x8F,0x00},
{0xE6,0xB5,0x99,0x00},{0xE4,0xBD,0xBF,0x00},
{0xE6,0xBE,0xB3,0x00},{0xE6,0xB8,0xAF,0x00},
{0xE8,0xAD,0xA6,0x00},{0xE9,0xA2,0x86,0x00},
{0xE5,0xAD,0xA6,0x00}
};
int init() //初始化各类参数
{
memset(data,0,sizeof(data));
memset(dataCls,0,sizeof(dataCls));
mClass = -1;
dNum = 0;
}
long getCurrentTime() //获取当前时间
{
struct timeval tv;
gettimeofday(&tv,NULL);
return tv.tv_sec * 1000000 + tv.tv_usec;
}
Mat& MyGammaCorrection(Mat& src, float fGamma)
{
CV_Assert(src.data); //若括号中的表达式为false,则返回一个错误的信息。
// accept only char type matrices
CV_Assert(src.depth() != sizeof(uchar));
// build look up table
unsigned char lut[256];
for( int i = 0; i < 256; i++ )
{
lut[i] = pow((float)(i/255.0), fGamma) * 255.0;
}
//先归一化,i/255,然后进行预补偿(i/255)^fGamma,最后进行反归一化(i/255)^fGamma*255
const int channels = src.channels();
switch(channels)
{
case 1:
{
//运用迭代器访问矩阵元素
MatIterator_<uchar> it, end;
for( it = src.begin<uchar>(), end = src.end<uchar>(); it != end; it++ )
//*it = pow((float)(((*it))/255.0), fGamma) * 255.0;
*it = lut[(*it)];
break;
}
case 3:
{
MatIterator_<Vec3b> it, end;
for( it = src.begin<Vec3b>(), end = src.end<Vec3b>(); it != end; it++ )
{
//(*it)[0] = pow((float)(((*it)[0])/255.0), fGamma) * 255.0;
//(*it)[1] = pow((float)(((*it)[1])/255.0), fGamma) * 255.0;
//(*it)[2] = pow((float)(((*it)[2])/255.0), fGamma) * 255.0;
(*it)[0] = lut[((*it)[0])];
(*it)[1] = lut[((*it)[1])];
(*it)[2] = lut[((*it)[2])];
}
break;
}
}
return src;
}
void get_pic_hog(Mat &img) //获取一张图片的hog特征
{
HOGDescriptor *hog = new HOGDescriptor(cvSize(16, 32), cvSize(8, 8), cvSize(8, 8), cvSize(4,4), 8);
// 窗口大小 block大小 block的移动步长 cell大小 bins个数
descriptors.clear(); //清空descriptor
hog->compute(img, descriptors, Size(16,32), Size(0, 0));
//图像 hog描绘子(计算结果储存在descriptor中) 窗口的移动步长 图像扩充像素数
//cout << "descriptors.size = " << descriptors.size() << endl;//输出hog特征描绘子的维度
delete hog;
hog = NULL;
}
void packData(STATES_OPT sta)
{
int p = 0;
if(sta == TRAIN)
{
for(vector<float>::iterator it = descriptors.begin(); it!=descriptors.end(); it++)
{
data[dNum][p++] = *it;
}
dataCls[dNum++][mClass] = 1.0;
}
else if(sta == TEST)
{
for(vector<float>::iterator it = descriptors.begin(); it!=descriptors.end(); it++)
{
f[0][p++] = *it;
}
}
else if(sta == CYCLETEST)
{
for(vector<float>::iterator it = descriptors.begin(); it!=descriptors.end(); it++)
{
f[0][p++] = *it;
}
}
}
int arg_type(char *arg)
{
if( !strcmp(arg,"-t") )
{
sta = TRAIN;
}
else if( !strcmp(arg,"-p") )
{
sta = TEST;
}
else if( !strcmp(arg,"-c") )
{
sta = CYCLETEST;
}
else if( !strcmp(arg,"--help") )
{
printf("--help 帮助信息\n");
printf("-t 训练\n");
printf("-p filepath 预测\n");
printf("-t dirpath 循环预测\n");
return 0;
}
return sta;
}
int train_pictrue(char *argv[])
{
cv::Directory dir;
cout<<"start training..."<<endl;
for(int i=0;i<37;i++)
{
string dirpath = argv[2];
string filetype = "*";
bool addPath = false;
char *p = typetable[i];
string dirname = p;
if(dirpath.at(dirpath.length()-1) != '/' )
{
cout<<"LINE:"<<__LINE__<<endl;
dirpath += "/";
}
dirpath += dirname;
cout<<"Dirpath = "<<dirpath<<endl;
vector<string> filenames = dir.GetListFiles(dirpath, filetype, addPath);
mClass++;
cout<<"i="<<i<<" filenames.size = "<< filenames.size()<<" mClass = " <<mClass<<endl;
for(int j=0; j < filenames.size(); j++)
{
string path = dirpath + "/" + filenames[j];
Mat imageMat = imread(path);
if(imageMat.empty())
{
cout << "image load fail!"<<endl;
return -1;
}
#ifdef GAMMA
MyGammaCorrection(imageMat, fGamma);
#endif
get_pic_hog(imageMat); //获取图片的hog特征
if(dNum+1 > PIC_NUM)
{
cout<<"The train picture is more than "<<PIC_NUM<<endl;
return 0;
}
packData(sta);
//cout<<"dNum:"<<dNum<<" mClass:"<<mClass<<endl;
}
}
cout<<endl<<"---------------Get picture finish ..."<<endl;
CvANN_MLP bp; //建立神经网络进行训练
CvANN_MLP_TrainParams params;
params.train_method=CvANN_MLP_TrainParams::BACKPROP; //训练方法为反向传播法
params.bp_moment_scale=0.1;
params.bp_dw_scale=0.1;
Mat layerSizes = ( Mat_<int>(1,3) << HOG_SIZE, atoi(argv[3]), CLASSNUM);//三层神经网络
Mat trainDate(PIC_NUM,HOG_SIZE,CV_32FC1,data);
Mat trainLable(PIC_NUM,CLASSNUM,CV_32FC1,dataCls);
bp.create(layerSizes, CvANN_MLP::SIGMOID_SYM, 1, 1); //激活函数sigmoid
cout<<"Training..."<<endl;
bp.train(trainDate,trainLable, Mat(),Mat(), params); //开始训练
char charat_name[32]={0};
#ifndef GAMMA
sprintf(charat_name,"zh_charat_%d_%s.xml",PIC_NUM/CLASSNUM,argv[3]);
#else
sprintf(charat_name,"zh_charat_%d_%s_gamma.xml",PIC_NUM/CLASSNUM,argv[3]);
#endif
bp.save(charat_name);
cout << "training finish!!" <<endl;
cout << dNum <<endl;
return 0;
}
int predict_a_pictrue(int argc,char *argv[])
{
if(argc != 4)
{
cout << "param error"<<endl;
return -1;
}
long time1,time2;
time1 = getCurrentTime();
CvANN_MLP bp; //建立神经网络进行训练
cout<<"start predict!!"<<endl;
bp.load(argv[2]); //加载神经网络文件
Mat imageMat = imread(argv[3]);
Mat testmat;
if(imageMat.empty())
{
cout << "image load fail!"<<endl;
return -1;
}
resize(imageMat,testmat,Size(16,32));
Mat binaryzation_image; //获取灰度图片
cvtColor(testmat,binaryzation_image,CV_BGR2GRAY); //灰度化
#ifdef GAMMA
MyGammaCorrection(imageMat, fGamma);
#endif
get_pic_hog(binaryzation_image); //获取图片的hog特征
packData(sta);
Mat nearest(1, CLASSNUM, CV_32FC1, Scalar(0));
Mat charFearture( 1, HOG_SIZE, CV_32FC1, f);
bp.predict(charFearture, nearest);
time2 = getCurrentTime();
printf("Predict used time:%ld\n",time2-time1);
Point maxLoc;
cout << "index\t"<<"charat\t" <<"value\t" <<endl;
for(int i=0;i<CLASSNUM;i++)
cout <<i <<"\t"<< tablename[i]<<"\t" << nearest.at<float>(0,i) <<endl;
minMaxLoc(nearest, NULL, NULL, NULL, &maxLoc);
cout<<"The predict Result: "<< "index = "<< maxLoc.x << " Charat = " << tablename[maxLoc.x] <<endl;
return 0;
}
int predict_dir_pictrue(int argc,char *argv[])
{
cv::Directory dir;
if(argc != 4)
{
cout << "need a true path in param 3!"<<endl;
return 0;
}
long time1,time2;
int error_count=0,sum=0;
time1 = getCurrentTime();
CvANN_MLP bp; //建立神经网络进行训练
cout<<"start cycle predict!!"<<endl;
bp.load(argv[2]);
printf("LINE:%d\n",__LINE__);
char *ptr = argv[3];
string dirpath = ptr;
//string dirpath = "../test_picture/";
string filetype = "*";
bool addPath = false;
for(int type = 0;type<CLASSNUM;type++ )
{
char *p = typetable[type];
string dirname = p;
string mydirpath;
if( dirpath.at(dirpath.length()-1) == '/')
mydirpath = dirpath + dirname;
else
mydirpath = dirpath + "/" +dirname;
//cout << "dirpath ="<<mydirpath<<endl;
vector<string> filenames = dir.GetListFiles(mydirpath, filetype, addPath);
for(int i=0;i < filenames.size(); i++)
{
/*
if(dirpath.at(dirpath.length))
{
string path = dirpath + filenames[i];
printf("path:%s",path);
}
else
{
printf("path:%s",path);
}
*/
string path = mydirpath + "/" + filenames[i];
Mat imageMat = imread(path);
Mat testmat;
resize(imageMat,testmat,Size(16,32));
Mat binaryzation_image; //获取灰度图片
cvtColor(testmat,binaryzation_image,CV_BGR2GRAY); //灰度化
get_pic_hog(binaryzation_image); //获取图片的hog特征
packData(sta);
Mat nearest(1, CLASSNUM, CV_32FC1, Scalar(0));
Mat charFearture( 1, HOG_SIZE, CV_32FC1, f);
bp.predict(charFearture, nearest);
Point maxLoc;
minMaxLoc(nearest, NULL, NULL, NULL, &maxLoc);
if(maxLoc.x != type)
{
error_count++;
cout<<"Predict flase! The Path:is "<<path<<" predict="<<tablename[maxLoc.x]<<endl;
cout<<"type="<<type<<endl;
cout << "index\t"<<"charat\t" <<"value\t" <<endl;
for(int k=0;k<CLASSNUM;k++)
{
cout <<k <<"\t"<< tablename[k]<<"\t" << nearest.at<float>(0,k) <<endl;
}
string error_pic_path = " ../error_zh/";
error_pic_path += dirname;
error_pic_path += "/";
error_pic_path += filenames[i];
cout<<error_pic_path<<endl;
char cmd[256]="mv ";
//char cmd[256]="cp ";
strcat(cmd,path.c_str());
strcat(cmd,error_pic_path.c_str());
cout<<endl<<"cmd:"<<cmd<<endl;
system(cmd);
}
sum++;
}
}
time2 = getCurrentTime();
printf("Predict %d picture used time:%ldus\n",sum,time2-time1);
printf("Predict error count:%d\n",error_count);
printf("Predict true rate = %f\n",1-1.0*error_count/sum);
}
int main(int argc,char *argv[])
{
if(argc < 2 || argc > 4)
{
printf("param error!\n");
return 0;
}
init(); //初始化各类参数
int ret = arg_type(argv[1]);
switch(ret)
{
case 1: //训练样本
{
int ret = train_pictrue(argv);
if(ret == -1)
cout<<"Train fail!!!"<<endl;
break;
}
case 2: //预测单张图片
{
int ret = predict_a_pictrue(argc,argv);
if(ret == -1)
cout<<"Predict a pictrue fail!!!"<<endl;
break;
}
case 3: //预测目录下多张图片
{
int ret = predict_dir_pictrue(argc,argv);
if(ret == -1)
cout<<"Predict dir pictrue fail!!!"<<endl;
break;
}
default:break;
}
return 0;
}
经测试:采用相同方法采集的字符图片(获取途径单一),训练时调整适当的参数(中间隐藏节点数等参数),字符识别准确率能达到99.99%以上,该识别方法方便应用于嵌入式设备中,如车牌识别摄像机(硬识别),大家感兴趣也可以自己参考尝试一下。