SVM数字识别


#include "stdafx.h"
#include <fstream>
#include "opencv2/opencv.hpp"
#include <vector>
using namespace std;
using namespace cv;
#define SHOW_PROCESS 1
#define ON_STUDY 1
class NumTrainData
{
public:
    NumTrainData()
    {
        memset(data, 0, sizeof(data));
        result = -1;
    }
public:
    float data[64];
    int result;
};
vector<NumTrainData> buffer;
int featureLen = 64;
void swapBuffer(char* buf)
{
    char temp;
    temp = *(buf);
    *buf = *(buf+3);
    *(buf+3) = temp;
    temp = *(buf+1);
    *(buf+1) = *(buf+2);
    *(buf+2) = temp;
}
void GetROI(Mat& src, Mat& dst)
{
    int left, right, top, bottom;
    left = src.cols;
    right = 0;
    top = src.rows;
    bottom = 0;
    //Get valid area
    for(int i=0; i<src.rows; i++)
    {
        for(int j=0; j<src.cols; j++)
        {
            if(src.at<uchar>(i, j) > 0)
            {
                if(j<left) left = j;
                if(j>right) right = j;
                if(i<top) top = i;
                if(i>bottom) bottom = i;
            }
        }
    }
    //Point center;
    //center.x = (left + right) / 2;
    //center.y = (top + bottom) / 2;
    int width = right - left;
    int height = bottom - top;
    int len = (width < height) ? height : width;
    //Create a squre
    dst = Mat::zeros(len, len, CV_8UC1);
    //Copy valid data to squre center
    Rect dstRect((len - width)/2, (len - height)/2, width, height);
    Rect srcRect(left, top, width, height);
    Mat dstROI = dst(dstRect);
    Mat srcROI = srcRect);
     srcROI.copyTo(dstROI);
}
int ReadTrainData()
{
    Mat src;
    Mat temp = Mat::zeros(8, 8, CV_8UC1);
    Mat m = Mat::zeros(1, 64, CV_8UC1);
    Mat dst;
    NumTrainData rtd;
    const int p_num = 377;
    //图片对应的数字,存储在nclass.xml中
    Mat label=cvCreateMat(1, p_num, CV_32SC1);
    FileStorage file("nclass.xml", FileStorage::READ);
    file["data"]>>label;
    file.release();
    cout<<label.size()<<endl;
    int k=0;
    char ch[10];
    int x=0;
    int ff;
    int i, j;
    int re;
    //读入图片,将其转化为行矩阵,存入NumTrainData中
    while (k < p_num)
    {
        cout<<k<<endl;
        string str;
        sprintf(ch,"%d",k);
        str=ch;
        str=str+".jpg";
        //Read data
        imread(str.c_str(),  0);
        GetROI(src, dst);
        resize(dst, temp, temp.size());
        for(i = 0; i<8; i++)
        {
            for(j = 0; j<8; j++)
            {
                ff = temp.at<uchar>(i, j);
                rtd.data[ i*8 + j] = ff;
            }
        }
        re=label.at<int>(0,k);
        rtd.result=re;
        buffer.push_back(rtd);
        k++;
    }
    return 0;
}
void newRtStudy(vector<NumTrainData>& trainData)
{
    int testCount = trainData.size();
    Mat data = Mat::zeros(testCount, featureLen, CV_32FC1);
    Mat res = Mat::zeros(testCount, 1, CV_32SC1);
    for (int i= 0; i< testCount; i++)
    {
        NumTrainData td = trainData.at(i);
        memcpy(data.data + i*featureLen*sizeof(float), td.data, featureLen*sizeof(float));
        res.at<unsigned int>(i, 0) = td.result;
    }
    /START RT TRAINNING//
    CvRTrees forest;
    CvMat* var_importance = 0;
    forest.train( data, CV_ROW_SAMPLE, res, Mat(), Mat(), Mat(), Mat(),
        CvRTParams(10,10,0,false,15,0,true,4,100,0.01f,CV_TERMCRIT_ITER));
    forest.save( "new_rtrees.xml" );
}
int newRtPredict()
{
    CvRTrees forest;
    forest.load( "new_rtrees.xml" );
    const char fileName[] = "../res/t10k-p_w_picpaths.idx3-ubyte";
    const char labelFileName[] = "../res/t10k-labels.idx1-ubyte";
    ifstream lab_ifs(labelFileName, ios_base::binary);
    ifstream ifs(fileName, ios_base::binary);
    if( ifs.fail() == true )
        return -1;
    if( lab_ifs.fail() == true )
        return -1;
    char magicNum[4], ccount[4], crows[4], ccols[4];
    ifs.read(magicNum, sizeof(magicNum));
    ifs.read(ccount, sizeof(ccount));
    ifs.read(crows, sizeof(crows));
    ifs.read(ccols, sizeof(ccols));
    int count, rows, cols;
    swapBuffer(ccount);
    swapBuffer(crows);
    swapBuffer(ccols);
    memcpy(&count, ccount, sizeof(count));
    memcpy(&rows, crows, sizeof(rows));
    memcpy(&cols, ccols, sizeof(cols));
    Mat src = Mat::zeros(rows, cols, CV_8UC1);
    Mat temp = Mat::zeros(8, 8, CV_8UC1);
    Mat m = Mat::zeros(1, featureLen, CV_32FC1);
    Mat img, dst;
    //Just skip label header
    lab_ifs.read(magicNum, sizeof(magicNum));
    lab_ifs.read(ccount, sizeof(ccount));
    char label = 0;
    Scalar templateColor(255, 0, 0);
    NumTrainData rtd;
    int right = 0, error = 0, total = 0;
    int right_1 = 0, error_1 = 0, right_2 = 0, error_2 = 0;
    while(ifs.good())
    {
        //Read label
        lab_ifs.read(&label, 1);
        label = label + '0';
        //Read data
        ifs.read((char*)src.data, rows * cols);
        GetROI(src, dst);
        //Too small to watch
        img = Mat::zeros(dst.rows*30, dst.cols*30, CV_8UC3);
        resize(dst, img, img.size());
        rtd.result = label;
        resize(dst, temp, temp.size());
        //threshold(temp, temp, 10, 1, CV_THRESH_BINARY);
        for(int i = 0; i<8; i++)
        {
            for(int j = 0; j<8; j++)
            {
                m.at<float>(0,j + i*8) = temp.at<uchar>(i, j);
            }
        }
        if(total >= count)
            break;
        char ret = (char)forest.predict(m);
        if(ret == label)
        {
            right++;
            if(total <= 5000)
                right_1++;
            else
                right_2++;
        }
        else
        {
            error++;
            if(total <= 5000)
                error_1++;
            else
                error_2++;
        }
        total++;
#if(SHOW_PROCESS)
        stringstream ss;
        ss << "Number " << label << ", predict " << ret;
        string text = ss.str();
        putText(img, text, Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.0, templateColor);
        imshow("img", img);
        if(waitKey(0)==27) //ESC to quit
            break;
#endif
    }
    ifs.close();
    lab_ifs.close();
    stringstream ss;
    ss << "Total " << total << ", right " << right <<", error " << error;
    string text = ss.str();
    putText(img, text, Point(50, 50), FONT_HERSHEY_SIMPLEX, 1.0, templateColor);
    imshow("img", img);
    //waitKey(0);
    return 0;
}
void newSvmStudy(vector<NumTrainData>& trainData)
{
    int testCount = trainData.size();
    Mat m = Mat::zeros(1, featureLen, CV_32FC1);
    Mat data = Mat::zeros(testCount, featureLen, CV_32FC1);
    Mat res = Mat::zeros(testCount, 1, CV_32SC1);
    for (int i= 0; i< testCount; i++)
    {
        NumTrainData td = trainData.at(i);
        memcpy(m.data, td.data, featureLen*sizeof(float));
        normalize(m, m);
        memcpy(data.data + i*featureLen*sizeof(float), m.data, featureLen*sizeof(float));
        res.at<unsigned int>(i, 0) = td.result;
    }
    /START SVM TRAINNING//
    CvSVM svm = CvSVM();
    CvSVMParams param;
    CvTermCriteria criteria;
    criteria= cvTermCriteria(CV_TERMCRIT_EPS, 1000, FLT_EPSILON);
    param= CvSVMParams(CvSVM::C_SVC, CvSVM::RBF, 10.0, 8.0, 1.0, 10.0, 0.5, 0.1, NULL, criteria);
    svm.train(data, res, Mat(), Mat(), param);
    svm.save( "SVM_DATA.xml" );
}
int newSvmPredict()
{
    CvSVM svm = CvSVM();
    svm.load( "SVM_DATA.xml" );
    Mat temp = Mat::zeros(8, 8, CV_8UC1);
    Mat m = Mat::zeros(1, featureLen, CV_32FC1);
    Mat dst;
    int k=0;
    const int p_num=17;
    char ch[10];
    while (k <= p_num)
    {
        string str;
        sprintf(ch,"%d",k+313);
        str=ch;
        str=str+".jpg";
        //Read data
        Mat  imread(str.c_str(),  0);
        GetROI(src, dst);
        resize(dst, temp, temp.size());
        //threshold(temp, temp, 10, 1, CV_THRESH_BINARY);
        for(int i = 0; i<8; i++)
        {
            for(int j = 0; j<8; j++)
            {
                m.at<float>(0,j + i*8) = temp.at<uchar>(i, j);
            }
        }
        normalize(m, m);
        int ret = (int)svm.predict(m);
        cout<<ch<<","<<ret<<endl;
        k++;
    }
    return 0;
}
int main( int argc, char *argv[] )
{
    //ON_STUDY,训练开关,为1时执行训练
#if(ON_STUDY)
    //读入训练数据,数据最终存入buffer中
    ReadTrainData();
    //训练,训练后的svm保存在SVM_DATA.xml中
    //newRtStudy(buffer);
    newSvmStudy(buffer);
#else
    //分析判断数字
    //newRtPredict();
    newSvmPredict();
#endif
    return 0;
}



输入的是标号为0到p_num的图片和对应的数字,nclass.xml是opencv对mat类型自动存储的格式,然后手动修改内容,想找个更好的,目前还没有找到