1.1 plotData.m
直接copy文档中给出的代码就行了
function plotData(X, y)
%PLOTDATA Plots the data points X and y into a new figure
% PLOTDATA(x,y) plots the data points with + for the positive examples
% and o for the negative examples. X is assumed to be a Mx2 matrix.
% Create New Figure
figure; hold on;
% ====================== YOUR CODE HERE ======================
% Instructions: Plot the positive and negative examples on a
% 2D plot, using the option 'k+' for the positive
% examples and 'ko' for the negative examples.
%
% Find Indices of Positive and Negative Examples
pos = find(y==1); neg = find(y == 0);
% Plot Examples
plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 2, ...
'MarkerSize', 7);
plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', ...
'MarkerSize', 7);
% =========================================================================
hold off;
end
1.2.1 sigmoid.m
logistic regression hypothesis为
h
θ
=
g
(
θ
T
x
)
h_\theta=g(\theta^Tx)
hθ=g(θTx) , 其中g(x)用到的是下面的公式
logistic function也叫sigmoid function,函数公式为
g
(
z
)
=
1
1
+
e
−
z
g(z)=\frac{1}{1+e^{-z}}
g(z)=1+e−z1
function g = sigmoid(z)
%SIGMOID Compute sigmoid function
% g = SIGMOID(z) computes the sigmoid of z.
% You need to return the following variables correctly
g = zeros(size(z));
% ====================== YOUR CODE HERE ======================
% Instructions: Compute the sigmoid of each value of z (z can be a matrix,
% vector or scalar).
g = 1 ./ 1 + e .^ (-z));
% =============================================================
end
1.2.2 costFunction.m
logistic regression的cost function为
J
(
θ
)
=
1
m
∑
i
=
1
m
[
−
y
(
i
)
l
o
g
(
h
θ
(
x
(
i
)
)
)
−
(
1
−
y
(
i
)
)
l
o
g
(
1
−
h
θ
(
x
(
i
)
)
)
]
J(\theta)=\frac{1}{m}\sum_{i=1}^m{[-y^{(i)}log(h_\theta(x^{(i)}))-(1-y^{(i)})log(1-h_\theta(x^{(i)}))]}
J(θ)=m1∑i=1m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]
梯度(grad)为
∂
J
(
θ
)
∂
θ
j
=
1
m
∑
i
=
1
m
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
j
(
i
)
\frac{\partial{J(\theta)}}{\partial{\theta_j}}=\frac{1}{m}\sum_{i=1}^m{(h_\theta(x^{(i)})-y^{(i)})x_j^{(i)}}
∂θj∂J(θ)=m1∑i=1m(hθ(x(i))−y(i))xj(i)
function [J, grad] = costFunction(theta, X, y)
%COSTFUNCTION Compute cost and gradient for logistic regression
% J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the
% parameter for logistic regression and the gradient of the cost
% w.r.t. to the parameters.
% Initialize some useful values
m = length(y); % number of training examples
% You need to return the following variables correctly
J = 0;
grad = zeros(size(theta));
% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost of a particular choice of theta.
% You should set J to the cost.
% Compute the partial derivatives and set grad to the partial
% derivatives of the cost w.r.t. each parameter in theta
%
% Note: grad should have the same dimensions as theta
%
h_theta = sigmoid(X * theta);
J = 1/m * (-y' * log(h_theta) - (1 - y)' * log(1 - h_theta));
grad = 1/m * X' * (h_theta - y);
% =============================================================
end
1.2.4 predict.m
预测结果,Admitted为1,Not Admitted为0
function p = predict(theta, X)
%PREDICT Predict whether the label is 0 or 1 using learned logistic
%regression parameters theta
% p = PREDICT(theta, X) computes the predictions for X using a
% threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1)
m = size(X, 1); % Number of training examples
% You need to return the following variables correctly
p = zeros(m, 1);
% ====================== YOUR CODE HERE ======================
% Instructions: Complete the following code to make predictions using
% your learned logistic regression parameters.
% You should set p to a vector of 0's and 1's
%
p = sigmoid(X * theta) > 0.5;
% =========================================================================
end
2.3 costFunctionReg.m
公式相比costFunction.m
加入了
θ
\theta
θ的方差来调整cost function,从而减少overfitting的情况
J
(
θ
)
=
1
m
∑
i
=
1
m
[
−
y
(
i
)
l
o
g
(
h
θ
(
x
(
i
)
)
)
−
(
1
−
y
(
i
)
)
l
o
g
(
1
−
h
θ
(
x
(
i
)
)
)
]
+
λ
2
m
∑
j
=
1
n
θ
j
2
J(\theta)=\frac{1}{m}\sum_{i=1}^m{[-y^{(i)}log(h_\theta(x^{(i)}))-(1-y^{(i)})log(1-h_\theta(x^{(i)}))]}+\frac{\lambda}{2m}\sum_{j=1}^n{\theta_j^2}
J(θ)=m1i=1∑m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]+2mλj=1∑nθj2
grad为
∂
J
(
θ
)
∂
θ
j
=
{
1
m
∑
i
=
1
m
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
j
(
i
)
f
o
r
j
=
0
(
1
m
∑
i
=
1
m
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
j
(
i
)
)
+
λ
m
θ
j
f
o
r
j
≥
1
\frac{\partial{J(\theta)}}{\partial{\theta_j}}=\begin{cases} \frac{1}{m}\sum_{i=1}^m{(h_\theta(x^{(i)})-y^{(i)})x_j^{(i)}} & for j = 0 \\ \left(\frac{1}{m}\sum_{i=1}^m{(h_\theta(x^{(i)})-y^{(i)})x_j^{(i)}}\right)+\frac{\lambda}{m}\theta_j & for j \geq 1 \end{cases}
∂θj∂J(θ)={m1∑i=1m(hθ(x(i))−y(i))xj(i)(m1∑i=1m(hθ(x(i))−y(i))xj(i))+mλθjforj=0forj≥1
注意X的第一项为常数1,所以向量
θ
(
28
×
1
)
\theta(28\times1)
θ(28×1)的第一项为偏差bias,计算cost时需要去除
θ
(
1
,
1
)
\theta(1,1)
θ(1,1)
function [J, grad] = costFunctionReg(theta, X, y, lambda)
%COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
% J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
% theta as the parameter for regularized logistic regression and the
% gradient of the cost w.r.t. to the parameters.
% Initialize some useful values
m = length(y); % number of training examples
% You need to return the following variables correctly
J = 0;
grad = zeros(size(theta));
% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost of a particular choice of theta.
% You should set J to the cost.
% Compute the partial derivatives and set grad to the partial
% derivatives of the cost w.r.t. each parameter in theta
h_theta = sigmoid(X * theta);
theta(1,1) = 0;
J = 1/m * (-y' * log(h_theta) - (1 - y)' * log(1 - h_theta)) + ...
lambda/2/m*(theta' * theta);
grad = (X' * (h_theta - y) + theta * lambda) / m;
% =============================================================
end