C++实现
- 代码实现
“linear_regression.h”
//单变量线性回归模型
struct elem_var1
{
double x, y; //训练集元素数据:自变量、因变量
};
class var1_lin_reg
{
public:
var1_lin_reg(const elem_var1* p, int size, double rate); //初始化
~var1_lin_reg(); //析构
double cost_fuction(); //返回当前预测方程对应代价函数的值
void update(); //同时更新方程参数
void find(); //最小化代价函数,找到收敛点时对应的方程参数
void get_par(double &_par1, double &_par0); //获得当前方程的参数
double est_val(double x); //使用拟合后的回归方程进行预测
private:
const elem_var1 * tran_set; //训练集
int setsize; //训练集数据量
double par1, par0; //单变量线性回归方程:h(x)=par1*x+par0
double learn_rate; //学习速率
};
“linear_regression.cpp”
//单变量线性回归
var1_lin_reg::var1_lin_reg(const elem_var1* p, int size, double rate)
{//参数列表:训练集数组地址,训练集数据量,学习速率
setsize = size; //获取训练集大小
tran_set = p; //指针指向训练集数组
learn_rate = rate; //设置学习速率
par1 = 0; //线性回归方程参数初始化为0
par0 = 0;
}
var1_lin_reg::~var1_lin_reg()
{
tran_set = NULL;
setsize = 0;
}
double var1_lin_reg::cost_fuction()
{//假设函数为h(x)=kx+b
double hx, sum = 0;
for (int i = 0;i < setsize;i++)
{
hx = par1 * tran_set[i].x + par0;
sum += (hx - tran_set[i].y)*(hx - tran_set[i].y);
}
return (sum / 2.0 / setsize);
}
void var1_lin_reg::update()
{//两参数(par0和par1)同时更新(关键是对微分项的处理)
double hx, sum0 = 0, sum1 = 0;
for (int i = 0;i < setsize;i++)
{
hx = par1 * tran_set[i].x + par0;
sum0 += hx - tran_set[i].y;
sum1 += (hx - tran_set[i].y)*tran_set[i].x;
}
sum0 = learn_rate * sum0 / (double)setsize;
sum1 = learn_rate * sum1 / (double)setsize;
par0 -= sum0;
par1 -= sum1;
}
void var1_lin_reg::find()
{//寻找代价函数最小化时对应的单变量线性回归函数的参数(代价函数收敛点)
double cost_pre, cost_last;
cost_pre = cost_fuction();
update(); //更新参数
cost_last = cost_fuction();
while (cost_pre != cost_last)
{//寻找收敛点
cost_pre = cost_last;
update();
cost_last = cost_fuction();
}
//获得假设函数最优拟合时的参数
}
void var1_lin_reg::get_par(double &_par1,double &_par0)
{//获取回归方程参数
_par1 = par1;
_par0 = par0;
}
double var1_lin_reg::est_val(double x)
{//返回预测值
find(); //获得假设函数最优拟合时的参数
double hx;
hx = par1 * x + par0; //计算估计值
return hx;
}
“主函数部分”
#include "linear_regression.h"
using namespace std;
int main()
{
elem_var1 transet[200];
memset(transet, 0, sizeof(transet));
int size;
double x;
double par1, par0;
cout << "请输入训练集容量:";
cin >> size;
cout << "请输入训练集数据:" << endl;
for (int i = 0;i < size;i++)
{
cout << "数据项" << i + 1 << ": ";
cin >> transet[i].x >> transet[i].y;
}
var1_lin_reg obj1(transet, size, 0.001);
cout << "请输入待预测数据:";
cin >> x;
cout << "预测数据为:" << obj1.est_val(x) << endl;
obj1.get_par(par1, par0);
if(par0>0)
cout << "h(x)=" << par1 << "*x+" << par0 << endl;
else
{
if(par0<0)
cout << "h(x)=" << par1 << "*x" << par0 << endl;
else
cout << "h(x)=" << par1 << "*x" << endl;
}
return 0;
}
-运行结果
以吴恩达老师的ex1data1.txt数据集为例:
用octave数据可视化:
线性拟合应该还可以。。。。。。