ex8
github:https://github.com/DLW3D/coursera-machine-learning-ex
练习文件下载地址:https://s3.amazonaws.com/spark-public/ml/exercises/on-demand/machine-learning-ex8.zip
Anomaly Detection 异常检测
Multivariate Gaussian 多元高斯分布
estimateGaussian.m
multivariateGaussian.m
function p = multivariateGaussian(X)
[m, n] = size(X);
%均值
mu = mean(X)';
%方差
sigma2 = var(X,1)';
k = length(mu);
if (size(Sigma2, 2) == 1) || (size(Sigma2, 1) == 1)
Sigma2 = diag(Sigma2);
end
%减去均值
X = bsxfun(@minus, X, mu(:)');
%计算p
p = (2 * pi) ^ (- k / 2) * det(Sigma2) ^ (-0.5) * ...
exp(-0.5 * sum(bsxfun(@times, X * pinv(Sigma2), X), 2));
end
Select Threshold 选择阈值
selectThreshold.m
function [bestEpsilon bestF1] = selectThreshold(yval, pval)
bestEpsilon = 0;
bestF1 = 0;
F1 = 0;
stepsize = (max(pval) - min(pval)) / 1000;
for epsilon = min(pval):stepsize:max(pval)
predictions = (pval < epsilon);
TP = sum(yval & predictions);
precision = TP/sum(predictions);
recall = TP/sum(yval);
F1 = 2*precision*recall/(precision+recall);
if F1 > bestF1
bestF1 = F1;
bestEpsilon = epsilon;
end
end
end
效果可视化
% The following command loads the dataset. You should now have the
% variables X, Xval, yval in your environment
load('ex8data1.mat');
fprintf('Visualizing Gaussian fit.\n\n');
% Estimate my and sigma2
[mu, sigma2] = estimateGaussian(X);
% Returns the density of the multivariate normal at each data point (row)
% of X
p = multivariateGaussian(X, mu, sigma2);
% Visualize the fit
visualizeFit(X, mu, sigma2);
xlabel('Latency (ms)');
ylabel('Throughput (mb/s)');
pval = multivariateGaussian(Xval, mu, sigma2);
[epsilon F1] = selectThreshold(yval, pval);
% Find the outliers in the training set and plot the
outliers = find(p < epsilon);
% Draw a red circle around those outliers
hold on
plot(X(outliers, 1), X(outliers, 2), 'ro', 'LineWidth', 2, 'MarkerSize', 10);
hold off
Collaborative Filtering 协同过滤
Cost Function And Gradient 代价函数和梯度
cofiCostFunc.m
function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ...
num_features, lambda)
%COFICOSTFUNC Collaborative filtering cost function
% [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
% num_features, lambda) returns the cost and gradient for the
% collaborative filtering problem.
%
% Unfold the U and W matrices from params
X = reshape(params(1:num_movies*num_features), num_movies, num_features);
Theta = reshape(params(num_movies*num_features+1:end), ...
num_users, num_features);
%电影m 用户u 特征f
J = (X * Theta' - Y) .* R;%计算差m*u
J = sum(J.^2)/2;%计算平方和1*u
J = sum(J);%1*1
J = J + lambda/2*(sum(sum(Theta .^2)) + sum(sum(X .^2)));%正规化1*1
X_grad = ((X * Theta' - Y) .* R) * Theta;%x梯度m*f
X_grad = X_grad + lambda * X;%正则化m*f
Theta_grad = ((X * Theta' - Y) .* R)' * X;%θ梯度u*f
Theta_grad = Theta_grad + lambda * Theta;%正则化u*f
grad = [X_grad(:); Theta_grad(:)];
end