% R^m->R^1
%
sigmoid=@(x,a,b)b./(1+exp(-a*x)); % activation function
% input : X,T
x1=linspace(-1,1,5);x2=x1;
[x2,x1]=meshgrid(x2,x1);
X=[x1(:).';x2(:).'];
T=sigmoid(3*X(1,:)+2*X(2,:)-1,3,1);
% or X=[[1;1],[1;0],[0;1],[0;0]];T=[1,0,0,0];
% set the options
tol=0.001;
lr=0.1; % learning rate
% alpha=0.2;
[m,N]=size(X);
W=rand(1,m);theta=rand(1);
b=1;a=1;
Y=sigmoid(W*X-repmat(theta,1,N),a,b);
E=T-Y;
k=0;
%DW0=zeros(1,m);DT0=0;
%Da0=0;Db0=0;
while(norm(E)>tol&&k<10000)
% lr=0.01/(1+k/500);
DW=lr*a/b*((T-Y).*Y.*(b-Y))*X.'; DT=lr*a/b*sum((T-Y).*Y.*(b-Y),2);
Db=lr*sum((T-Y).*Y,2)/b;
Da=lr*sum((T-Y).*Y.*(b-Y).*(W*X),2)/b;
W=W+DW;
theta=theta-DT;
a=a+Da; b=b+Db;
Y=sigmoid(W*X-repmat(theta,1,N),a,b);
E=T-Y;
k=k+1;
end
x1=linspace(-1,1,20);x2=x1;
[x2,x1]=meshgrid(x2,x1);
y=sigmoid(W(1)*x1+W(2)*x2-theta,a,b);
mesh(x1,x2,y);hold on;
%
sigmoid=@(x,a,b)b./(1+exp(-a*x)); % activation function
% input : X,T
x1=linspace(-1,1,5);x2=x1;
[x2,x1]=meshgrid(x2,x1);
X=[x1(:).';x2(:).'];
T=sigmoid(3*X(1,:)+2*X(2,:)-1,3,1);
% or X=[[1;1],[1;0],[0;1],[0;0]];T=[1,0,0,0];
% set the options
tol=0.001;
lr=0.1; % learning rate
% alpha=0.2;
[m,N]=size(X);
W=rand(1,m);theta=rand(1);
b=1;a=1;
Y=sigmoid(W*X-repmat(theta,1,N),a,b);
E=T-Y;
k=0;
%DW0=zeros(1,m);DT0=0;
%Da0=0;Db0=0;
while(norm(E)>tol&&k<10000)
% lr=0.01/(1+k/500);
DW=lr*a/b*((T-Y).*Y.*(b-Y))*X.'; DT=lr*a/b*sum((T-Y).*Y.*(b-Y),2);
Db=lr*sum((T-Y).*Y,2)/b;
Da=lr*sum((T-Y).*Y.*(b-Y).*(W*X),2)/b;
W=W+DW;
theta=theta-DT;
a=a+Da; b=b+Db;
Y=sigmoid(W*X-repmat(theta,1,N),a,b);
E=T-Y;
k=k+1;
end
x1=linspace(-1,1,20);x2=x1;
[x2,x1]=meshgrid(x2,x1);
y=sigmoid(W(1)*x1+W(2)*x2-theta,a,b);
mesh(x1,x2,y);hold on;
plot3(X(1,:),X(2,:),T,'ro');
xlabel('x_1');ylabel('x_2');title('Y/T');