#include <stdio.h>
#define M 10
//todo:多个Feather的情况
//todo: 考虑非线性回归问题
//is convergence
//todo:使用两次更新theta时,J变化很小作为收敛条件
int is_convergence(int sample[M][2],double *theta)
{
double J = 0;
double h = 0;
int i;
for(i = 0; i < M; i++)
{
h = theta[0] + theta[1]*sample[i][0];
J += (h - sample[i][1])*(h - sample[i][1]);
}
J *= 0.5;
if(J < 1e-15)
return 1;
else
return 0;
}
//批量梯度下降
void batch_gradient_descent(int sample[M][2], double alpha, double *theta)
{
int i;
int count = 0;
double J = 0;
double h = 0;
while(1)
{
//update theta
double error_sum1 = 0;
double error_sum2 = 0;
for(i = 0; i < M; i++)
{
h = theta[0]+theta[1]*sample[i][0];
error_sum1 += (sample[i][1]-h)*1;
error_sum2 += (sample[i][1]-h)*sample[i][0];
}
theta[0] += alpha * error_sum1;
theta[1] += alpha * error_sum2;
if(is_convergence(sample,theta))
break;
count++;
}
fprintf(stderr, "batch_theta = %f batch_theta = %f\n", theta[0],theta[1]);
fprintf(stderr, "batch_count = %d\n",count);
}
//随机梯度下降
void stochastic_gradient(int sample[M][2], double alpha,double *theta)
{
double J = 0;
double h = 0;
int random = 0;
int count = 0;
//int i;
//for(i = 0; i < 100; i++)
while(1)
{
//update theta
int j = (random+=3) % M;//todo:改为随机选择
h = theta[0]+theta[1]*sample[j][0];
theta[0] += alpha * (sample[j][1]-h);
theta[1] += alpha * (sample[j][1]-h)*sample[j][0];
if(is_convergence(sample,theta))
break;
count++;
}
fprintf(stderr, "stoch_theta = %f stock_theta = %f\n", theta[0],theta[1]);
fprintf(stderr, "stoch_count = %d\n",count);
}
int main()
{
//prepare data
int sample[M][2];
int i;
for(i = 0; i < M; i++)
{
sample[i][0] = i;
sample[i][1] = 4+11*i; //4+11x
}
//alpha is very important
double batch_alpha = 0.000006071;
double batch_theta[2]={1000,4000};
batch_gradient_descent(sample,batch_alpha,batch_theta);
double stoch_alpha = 0.00061;
double stoch_theta[2]={1000,-40000000};
stochastic_gradient(sample,stoch_alpha,stoch_theta);
return 0;
#define M 10
//todo:多个Feather的情况
//todo: 考虑非线性回归问题
//is convergence
//todo:使用两次更新theta时,J变化很小作为收敛条件
int is_convergence(int sample[M][2],double *theta)
{
double J = 0;
double h = 0;
int i;
for(i = 0; i < M; i++)
{
h = theta[0] + theta[1]*sample[i][0];
J += (h - sample[i][1])*(h - sample[i][1]);
}
J *= 0.5;
if(J < 1e-15)
return 1;
else
return 0;
}
//批量梯度下降
void batch_gradient_descent(int sample[M][2], double alpha, double *theta)
{
int i;
int count = 0;
double J = 0;
double h = 0;
while(1)
{
//update theta
double error_sum1 = 0;
double error_sum2 = 0;
for(i = 0; i < M; i++)
{
h = theta[0]+theta[1]*sample[i][0];
error_sum1 += (sample[i][1]-h)*1;
error_sum2 += (sample[i][1]-h)*sample[i][0];
}
theta[0] += alpha * error_sum1;
theta[1] += alpha * error_sum2;
if(is_convergence(sample,theta))
break;
count++;
}
fprintf(stderr, "batch_theta = %f batch_theta = %f\n", theta[0],theta[1]);
fprintf(stderr, "batch_count = %d\n",count);
}
//随机梯度下降
void stochastic_gradient(int sample[M][2], double alpha,double *theta)
{
double J = 0;
double h = 0;
int random = 0;
int count = 0;
//int i;
//for(i = 0; i < 100; i++)
while(1)
{
//update theta
int j = (random+=3) % M;//todo:改为随机选择
h = theta[0]+theta[1]*sample[j][0];
theta[0] += alpha * (sample[j][1]-h);
theta[1] += alpha * (sample[j][1]-h)*sample[j][0];
if(is_convergence(sample,theta))
break;
count++;
}
fprintf(stderr, "stoch_theta = %f stock_theta = %f\n", theta[0],theta[1]);
fprintf(stderr, "stoch_count = %d\n",count);
}
int main()
{
//prepare data
int sample[M][2];
int i;
for(i = 0; i < M; i++)
{
sample[i][0] = i;
sample[i][1] = 4+11*i; //4+11x
}
//alpha is very important
double batch_alpha = 0.000006071;
double batch_theta[2]={1000,4000};
batch_gradient_descent(sample,batch_alpha,batch_theta);
double stoch_alpha = 0.00061;
double stoch_theta[2]={1000,-40000000};
stochastic_gradient(sample,stoch_alpha,stoch_theta);
return 0;
}
MATLAB:
x=[1:1:100]';
y=3*x+4;
x = reshape([ones(100,1);x],100,2);
theta=inv(x'*x)*x'*y
y=x * theta