前言
本文的实现主要是参照了Andrew NG的机器学习课程所讲的内容。理论知识上一篇博文已经介绍。由于刚接触C++,代码写得比较粗糙,望见谅。
实验环境
Visual Studio 2013
OpenCV 2.4
数据
本次实验数据包含了手写数字0和1,每类样本分别有20个。如下图所示:
实验代码
1.定义一个LogisticRegression的类:
头文件 LogisticRegression.h
#ifndef _LOGISTIC_REGRESSION_H_
#define _LOGISTIC_REGRESSION_H_
#include <vector>
#include <string>
#include <iostream>
#include <strstream>
#include <fstream>
#include <opencv2\core\core.hpp>
using namespace cv;
using namespace std;
class LogisticRegression
{
public:
//加载数据
void loadData(string filePath);//xml
//数据返回
Mat& getX();
Mat& getEX();
Mat& getTheta();
Mat& getY();
//特征归一化
void featureNormalize(Mat& x);
//预测
float predict(Mat& x);
//假设函数,此处设成静态函数,是方便在其他类中直接调用
static void calculateHx(Mat& x, Mat& theta, Mat& Hx);
static void sigmoid(Mat& z, Mat& g);//激活函数
//计算分类器的正确率
float classiferAccuracy(Mat& x,Mat& y,Mat& theta);
private:
Mat originalX;
Mat extendX;
Mat y;
int dim;//数据维数
Mat theta;//列向量
};
#endif
实现文件 LogisticRegression.cpp
#include "LogisticRegression.h"
//此处的训练样本数据存放在xml中
//包含了数字0和1,每类样本20个
void LogisticRegression::loadData(String filename)
{
cout << "loading the dataset...";
FileStorage f;
if (f.open(filename, FileStorage::READ))//release模式下运行正常,debug模式下打不开
{
f["datamat"] >> originalX;
f["labelsmat"] >> y;
f.release();
}
else
{
cerr << "file can not be opened: " << filename << endl;
return;
}
originalX.convertTo(originalX, CV_32F);
y.convertTo(y, CV_32F);
cout << "read " << originalX.rows << " rows of data" << endl;
extendX = Mat::ones(originalX.rows, originalX.cols + 1, CV_32FC1);
originalX.copyTo(extendX.colRange(1, extendX.cols));
extendX.col(0) = Mat::ones(extendX.rows, 1, extendX.type());
theta = Mat::zeros(originalX.cols + 1, 1, originalX.type());
}
//数据返回
Mat& LogisticRegression::getX()
{
return originalX;
}
Mat& LogisticRegression::getY()
{
return y;
}
Mat& LogisticRegression::getEX()
{
return extendX;
}
Mat& LogisticRegression::getTheta()
{
return theta;
}
//特征归一化
void LogisticRegression::featureNormalize(Mat& ex)
{
if (ex.cols <= 2) return;//只有一个特征的时候不用归一化
for (int col = 1; col < ex.cols; col++)
{
Mat mean;
Mat stddev;
meanStdDev(ex.col(col), mean, stddev);
//归一化
ex.col(col) = ex.col(col) - mean;
ex.col(col) = ex.col(col) / stddev;
}
}
//预测
//x:一个样本
//theta 训练得到的参数
float LogisticRegression::predict(Mat& ex)
{
Mat hx;
calculateHx(ex, this->getTheta(), hx);
return hx.at<float>(0, 0)>0.5?1:0;
}
//计算假设函数
void LogisticRegression::calculateHx(Mat& x, Mat& theta, Mat& Hx)
{
Mat thetaX = x*theta;
sigmoid(thetaX, Hx);
}
//计算激活函数
void LogisticRegression::sigmoid(Mat& z, Mat& g)
{
Mat expItem;
exp(-z, expItem);
g = 1.0 / (1 + expItem);
}
//计算分类器精度
float LogisticRegression::classiferAccuracy(Mat& x, Mat& y, Mat& theta)
{
Mat hx;
calculateHx(x, theta,hx);
Mat predict = (hx > 0.5)/255;
predict.convertTo(predict, y.type());
return 100 * (float)countNonZero(y == predict) / predict.rows;
}
2.定义一个梯度下降的类GradientDescent:
头文件 GradientDescent.h
#ifndef _GRADIENTDESCENT_H
#define _GRADIENTDESCENT_H
#include <opencv2\core\core.hpp>
using namespace cv;
class GradientDescent
{
public:
//批量梯度下降
void gradientDescent(Mat& x, Mat& y, Mat &theta, int num_iters, float alpha);
//计算代价
float computeCost(Mat& x, Mat& y, Mat &theta);
};
#endif
实现文件 GradientDescent.cpp
#include "GradientDescent.h"
#include "LogisticRegression.h"
#include <iostream>
using namespace std;
void GradientDescent::gradientDescent(Mat& x, Mat& y, Mat &theta, int num_iters, float alpha)
{
int dataSize = x.rows;//数据总数
int dim = theta.rows;//theta的维数
for (int i = 0; i < num_iters; i++)
{
//方法1:for-loop
for (int j = 0; j < dim; j++)
{
float cost = 0.0;
for (int k = 0; k < dataSize; k++)
{
Mat xRow = x.row(k);
Mat yRow = y.row(k);
Mat hx;
LogisticRegression::calculateHx(xRow, theta, hx);
Mat tmp = (hx - yRow)*xRow.col(j);
cost += tmp.at<float>(0, 0);
}
theta.at<float>(j, 0) -= alpha*cost / dataSize;
}
}
}
float GradientDescent::computeCost(Mat& x, Mat& y, Mat &theta)
{
float error = .0;
Mat hx;
Mat j;
LogisticRegression::calculateHx(x,theta,hx);
Mat logHx;
Mat logHx1;
log(hx, logHx);
log(1 - hx, logHx1);
j = -(y.t()*logHx + (1 - y.t())*logHx1);
error = j.at<float>(0, 0)/x.rows;
return error;
}
3.代码测试:
样本集来自mnist数据库,可在官网下载http://yann.lecun.com/exdb/mnist/
为了避免每次都从图片中去读数据,可用下面的语句把已经读好的样本数据和类标号保存到xml文件中
FileStorage f(".\\ex_data.xml", FileStorage::WRITE);
f<<"datamat"<< originalX;//originalX:存放原始数据的Mat
f<<"labelsmat"<< y;//y:存放类标号的Mat
f.release()
<span style="font-size: 14px;">
</span>
main.cpp内容如下:
#include "LogisticRegression.h"
#include "GradientDescent.h"
#include <iostream>
#include <opencv2\opencv.hpp>
using namespace cv;
using namespace std;
int main()
{
LogisticRegression regression;
const String filename = "data/data01.xml";
regression.loadData(filename);
GradientDescent gd;
float cost=gd.computeCost(regression.getEX(), regression.getY(), regression.getTheta());
cout << "cost:" << cost << endl;
gd.gradientDescent(regression.getEX(), regression.getY(), regression.getTheta(), 400, 0.01);
cout<<"第1个样本的分类结果:"<<regression.predict(regression.getEX().row(0))<<endl;
cout << "最后一个样本的分类结果:" << regression.predict(regression.getEX().row(39))<<endl;
float accuracy=regression.classiferAccuracy(regression.getEX(), regression.getY(), regression.getTheta());
cout << "分类器对训练集的正确率为:" << accuracy <<"%"<< endl;
getchar();
}
输出结果:
由于样本量少,同时0和1比较容易区分,因此在训练集上达到了100%的正确率