4、算法诊断

【参考资料】
Andrew Ng coursera上的《机器学习》ex5
https://blog.csdn.net/LilyNothing/article/details/52291686
【模型选择】
% 一、计算指标
% 1、损失函数
function [J, grad] = costFunction(theta, X, y, lambda)
    m = size(X, 1);
    grad = X' * (X * theta - y)/m;
    grad(2:end, :) = grad(2:end, :) + lambda/m*theta(2:end, :);
    J = (X * theta - y)' * (X * theta - y)/(2*m) + lambda/2/m*sum(theta(2:end).^2);
end

% 2、高级算法快速模型训练
function [theta] = trainLinearReg(X, y, lambda)
   initial_theta = zeros(size(X, 2), 1); 
   costFunc = @(t) costFunction(t, X, y, lambda);
   options = optimset('MaxIter', 200, 'GradObj', 'on');
   theta = fmincg(costFunc, initial_theta, options);
end

% 3、计算学习曲线
function [error_train, error_val] = learningCurve(X, y, Xval, yval, lambda)
   m = size(X, 1);
   error_train = zeros(m, 1);
   error_val   = zeros(m, 1);
   for i=1:m
     theta = trainLinearReg(X(1:i,:),y(1:i), lambda);
     [error_train(i),grad] = costFunction(theta, X(1:i,:), y(1:i),  lambda); 
     [error_val(i),  grad] = costFunction(theta, Xval, yval, lambda);
   end
end

% 4、增加特征项解决欠拟合
function [X_poly] = polyFeatures(X, p)
   m = size(X, 1);
   X_poly = zeros(m, p);
   for i=1:p
       X_poly(:,i) = X .^ i;
   end
end

% 5、选择lambda
function [lambda_vec, error_train, error_val] = validationCurve(X, y, Xval, yval)
   lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]';
   m = length(lambda_vec);
   error_train = zeros(m, 1);
   error_val = zeros(m, 1);
   for i = 1:m
      lambda = lambda_vec(i);
      [theta] = trainLinearReg(X, y, lambda);
      error_train(i)= costFunction(theta, X, y, 0);
      error_val(i)= costFunction(theta, Xval, yval, 0);
   end
end

% 二、分析指标
% 1、读取数据
cd D:\study\AI\data\ex5
load ('ex5data1.mat'); % X, y, Xtest, ytest,Xval, yval
plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5);
xlabel('Change in water level (x)');
ylabel('Water flowing out of the dam (y)');

% 2、验证损失函数
m = size(X, 1);
theta = [1 ; 1];
lambda = 1;
[J, grad] = costFunction(theta, [ones(m, 1) X], y, lambda);
fprintf(['Cost at theta = [1 ; 1]: %f should be 303.993192\n'], J);
fprintf(['Gradient at theta = [1 ; 1]:[%f; %f] should be [-15.303016; 598.250744]\n'], grad(1), grad(2));

% 3、训练数据集
lambda = 0;
[theta] = trainLinearReg([ones(m, 1) X], y, lambda);
plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5);
xlabel('Change in water level (x)');
ylabel('Water flowing out of the dam (y)');
hold on;
plot(X, [ones(m, 1) X]*theta, '--', 'LineWidth', 2)
hold off;

% 4、学习曲线
lambda = 0;
[error_train, error_val] = learningCurve([ones(m, 1) X], y, [ones(size(Xval, 1), 1) Xval], yval, lambda);
plot(1:m, error_train, 1:m, error_val);
title('Learning curve for linear regression')
legend('Train', 'Cross Validation')
xlabel('Number of training examples')
ylabel('Error')
axis([0 13 0 150])
for i = 1:m
    fprintf('  \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i));
end

% 5、增加特征项解决欠拟合
p = 8;
X_poly = polyFeatures(X, p);
[X_poly, mu, sigma] = featureNormalize(X_poly);  % Normalize
X_poly = [ones(m, 1), X_poly];                   % Add Ones

% Map X_poly_test and normalize (using mu and sigma)
X_poly_test = polyFeatures(Xtest, p);
X_poly_test = bsxfun(@minus, X_poly_test, mu);
X_poly_test = bsxfun(@rdivide, X_poly_test, sigma);
X_poly_test = [ones(size(X_poly_test, 1), 1), X_poly_test];         % Add Ones

% Map X_poly_val and normalize (using mu and sigma)
X_poly_val = polyFeatures(Xval, p);
X_poly_val = bsxfun(@minus, X_poly_val, mu);
X_poly_val = bsxfun(@rdivide, X_poly_val, sigma);
X_poly_val = [ones(size(X_poly_val, 1), 1), X_poly_val];           % Add Ones

fprintf('Normalized Training Example 1:\n');
fprintf('  %f  \n', X_poly(1, :));


% 6、添加特征后的学习曲线
lambda = 0;
[theta] = trainLinearReg(X_poly, y, lambda);

% Plot training data and fit
figure(1);
plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5);
plotFit(min(X), max(X), mu, sigma, theta, p);
xlabel('Change in water level (x)');
ylabel('Water flowing out of the dam (y)');
title (sprintf('Polynomial Regression Fit (lambda = %f)', lambda));

figure(2);
[error_train, error_val] = learningCurve(X_poly, y, X_poly_val, yval, lambda);
plot(1:m, error_train, 1:m, error_val);

title(sprintf('Polynomial Regression Learning Curve (lambda = %f)', lambda));
xlabel('Number of training examples')
ylabel('Error')
axis([0 13 0 100])
legend('Train', 'Cross Validation')

fprintf('Polynomial Regression (lambda = %f)\n\n', lambda);
fprintf('# Training Examples\tTrain Error\tCross Validation Error\n');
for i = 1:m
    fprintf('  \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i));
end

% 7、选择lambda 效果
[lambda_vec, error_train, error_val] = validationCurve(X_poly, y, X_poly_val, yval);
close all;
plot(lambda_vec, error_train, lambda_vec, error_val);
legend('Train', 'Cross Validation');
xlabel('lambda');
ylabel('Error');

fprintf('lambda\t\tTrain Error\tValidation Error\n');
for i = 1:length(lambda_vec)
    fprintf(' %f\t%f\t%f\n',lambda_vec(i), error_train(i), error_val(i));
end

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值