数据产生
x
1
=
[
r
a
n
d
(
1
,
20
)
,
3
+
r
a
n
d
(
1
,
20
)
]
x_1=[rand(1,20),3+rand(1,20)]
x1=[rand(1,20),3+rand(1,20)]
x
2
=
[
r
a
n
d
(
1
,
20
)
,
3
+
r
a
n
d
(
1
,
20
)
]
x_2=[rand(1,20),3+rand(1,20)]
x2=[rand(1,20),3+rand(1,20)]
y
=
[
o
n
e
s
(
1
,
20
)
,
z
e
r
o
s
(
1
,
20
)
]
y=[ones(1,20),zeros(1,20)]
y=[ones(1,20),zeros(1,20)]
假设
h
(
Θ
)
=
g
(
Θ
X
)
Θ
=
[
θ
0
,
θ
1
,
θ
2
]
X
=
[
x
0
,
x
1
,
x
2
]
T
x
0
=
o
n
e
s
(
1
,
m
)
g
(
z
)
=
1
1
−
e
−
z
h(\Theta)=g(\Theta X) \\ \Theta=[\theta_0,\theta_1,\theta_2] \\ X=[x_0,x_1,x_2]^T\\ x_0=ones(1,m)\\ g(z)=\frac{1}{1-e^{-z}}
h(Θ)=g(ΘX)Θ=[θ0,θ1,θ2]X=[x0,x1,x2]Tx0=ones(1,m)g(z)=1−e−z1
m
m
m为数据个数
代价函数
J
(
Θ
)
=
1
2
∑
i
=
1
m
C
o
s
t
(
h
θ
(
x
(
i
)
,
y
(
i
)
)
J(\Theta)=\frac{1}{2}\sum^m_{i=1}Cost(h_\theta(x^{(i)},y^{(i)})
J(Θ)=21∑i=1mCost(hθ(x(i),y(i))
C
o
s
t
(
h
θ
(
x
(
i
)
,
y
(
i
)
)
=
−
y
(
i
)
∗
l
o
g
(
h
(
x
(
i
)
)
)
−
(
1
−
y
(
i
)
)
l
o
g
(
1
−
h
(
x
(
i
)
)
)
Cost(h_\theta(x^{(i)},y^{(i)})=-y^{(i)}*log(h(x^{(i)}))-(1-y^{(i)})log(1-h(x^{(i)}))
Cost(hθ(x(i),y(i))=−y(i)∗log(h(x(i)))−(1−y(i))log(1−h(x(i)))
梯度下降
Θ
=
Θ
−
α
∂
J
∂
Θ
=
Θ
−
1
m
∑
(
(
h
θ
(
x
)
−
y
)
⋅
X
)
\Theta=\Theta-\alpha\frac {\partial{J}}{\partial{\Theta}}\\ =\Theta-\frac {1}{m} \sum((h_\theta(x)-y)\cdot X)
Θ=Θ−α∂Θ∂J=Θ−m1∑((hθ(x)−y)⋅X)
偏导求解过程
代码
// An highlighted block
clear;
close all;
sigmoid = @(z) 1./(1+exp(-z));
x1=[rand(1,20) 1.5+rand(1,20)];
x2=[rand(1,20) 1.5+rand(1,20)];
m=length(x1);
y=[ones(1,m/2),zeros(1,m/2)];
figure(1);
hold on;
plot3(x1,x2,y,'r*');
%逻辑回归假设h(x)=g(theta0+theta1*x1^2+theta2*x2^2),逻辑函数g(z)=1/(1+exp(-z))
%代价函数J(theta)=1/m*sum(Cost(h(x),y)),Cost(h(x),y)=-y*log(h(x))-(1-y)log(1-h(x));
Theta = rand(1,3);
X=[ones(1,m);x1.^2;x2];
for n=1:1000
z = Theta*X;
hx = sigmoid(z);
d = hx - y;
J(n) = sum(-y.*log(hx)-(1-y).*log(1-hx));
Theta = Theta - 1/m*sum(transpose(d.*X));
end
%mesh(0:0.1:4,0:0.1:4,Theta*[ones(1,41);0:0.1:4;0:0.1:4],'b');
[x1_temp1,x2_temp1]=meshgrid(0:0.1:3);
z_temp1=Theta(1)*ones(31,31)+Theta(2)*x1_temp1.^2+Theta(3)*x2_temp1;
mesh(x1_temp1,x2_temp1,z_temp1);
x1_temp2 = 0:0.1:3;
x2_temp2 = 0:0.1:3;
z_temp2=Theta(1)*ones(1,31)+Theta(2)*x1_temp2.^2+Theta(3)*x2_temp2;
plot3(x1_temp2,x2_temp2,z_temp2);
x1_temp3 = 1:0.1:1.5;
x2_temp3 = (-Theta(2)*x1_temp3.^2-Theta(1))/Theta(3);
plot(x1_temp3,x2_temp3);
hold off;
figure(2);
plot(1:n,J);