梯度下降算,可用于回归,也可用于分类,下面是该算法的最简单的演示,学习率alpha对算法的收敛影响很大,取大了不收敛,过小过大迭代次数增加;
clear;clc; data=genlineardata(13,-3.36,100);
%生成在直线y=13-3.36*x周围的100个点存入data[100*3],data第一列全为1
theta=zeros(2,1);
theta(1)=0;
theta(2)=1;
times=0;
alpha=0.1;
h=0;
distant=0;
sum0=0;
sum1=0;
[row,col]=size(data);
figure;
plot(data(:,2),data(:,3),'r.');
x=-5:5;
switch(3)
case 1,
% stochastic gradient descent.
while(1)
y=theta(2)*x+theta(1);
hold on;
plot(x,y,'r');
temp0=theta(1);
temp1=theta(2);
for i=1:row
h=theta(1)+theta(2)*data(i,2);
alpha=0.2/i;
sum0=(data(i,3)-h);
sum1=(data(i,3)-h)*data(i,2);
theta(1)=theta(1)+alpha*sum0;
theta(2)=theta(2)+alpha*sum1;
end
distant=abs(theta(1)-temp0)+abs(theta(2)-temp1);
if(distant<0.001)
break;
end
times=times+1;
if(times>20)
break;
end
end
case 2,
% batch gradient descent.
while(1)
y=theta(2)*x+theta(1);
hold on;
plot(x,y,'r');
temp0=theta(1);
temp1=theta(2);
sum0=0;
sum1=0;
times=times+1;
alpha=1/(times*row);
for i=1:row
h=theta(1)+theta(2)*data(i,2);
sum0=sum0+alpha*(data(i,3)-h);
sum1=sum1+alpha*(data(i,3)-h)*data(i,2);
end
theta(1)=theta(1)+sum0;
theta(2)=theta(2)+sum1/2;
distant=abs(theta(1)-temp0)+abs(theta(2)-temp1);
if(distant<0.001)
break;
end
if(times>20)
break;
end
end
case 3,
%Least squares revisited
y=theta(2)*x+theta(1);
hold on;
plot(x,y,'r');
X=data(:,1:2);
Y=data(:,3);
theta=(X'*X)\X'*Y;
end
theta(1)
theta(2)
times
y=theta(2)*x+theta(1);
hold on;
plot(x,y,'g');
% times