例1
clear,clc
%% 数据准备
x1 = [7 1 11 11 7 11 3 1 2 21 1 11 10]';
x2 = [26,29,56,31,52,55,71,31,54,47,40,66,68]';
x3 = [6 15,8,8,6,9,17,22,18,4,23,9,8]';
x4 = [60,52,20,47,33,22,6,44,22,26,34,12,12]';
y = [78.5 74.3 104.3 87.6 95.9 109.2 102.7 72.5 93.1 115.9 83.8 113.3 109.4]';
x = [x1,x2,x3,x4];
choose = 10; % 选择的个数
x_train = x(1:choose,:); % 训练集
y_train = y(1:choose,:);
x_test = x(choose+1:end,:); % 测试集
y_test = y(choose+1:end,:);
%% 检验
corr([x,y]) % 相关系数矩阵
%% 原始数据绘图
for i = 1:4
subplot(2,2,i)
plot(x(:,i),y,'k.','markersize',10)
title('原始数据绘图')
end
%% 拟合
mdl1 = fitlm(x_train,y_train);
mdl1
figure
mdl1.plot
%% 回归诊断
Res = mdl1.Residuals;
Res_Stu = Res.Studentized;
Res_stan = Res.Standardized;
% 查看残差图找出异常点
figure
subplot(1,2,1)
plot(Res_Stu,'kx')
refline(0,-2)
refline(0,2)
subplot(1,2,2)
plot(Res_stan,'kx')
refline(0,2)
refline(0,-2)
% 排除异常点并重新拟合
id = find(abs(Res_Stu)>2);
mdl2 = LinearModel.fit(x_train,y_train,'Exclude',id); % 排除异常点的拟合
mdl2
figure
mdl2.plot
%% 稳健回归(使用加权最小二乘)
mdl3 = LinearModel.fit(x_train,y_train,'RobustOpts','on');
mdl3
figure
mdl3.plot
%% 预测
% 原始数据
y_test1 = mdl1.predict(x_test);
% 去除异常值
y_test2 = mdl2.predict(x_test);
% 稳健回归
y_test3 = mdl3.predict(x_test);
figure
plot(1:length(y_test),y_test,'ko');
hold on
plot(1:length(y_test),y_test1,'r+');
plot(1:length(y_test),y_test2,'bx');
plot(1:length(y_test),y_test3,'g*');
xlabel('预测数据的序号')
ylabel('Y')
title('三个模型预测结果的比较')
legend('真实值','原始数据回归直线','去除异常值后的回归直线','稳健回归直线','location','best')
xlim([0,4])
例2
clear,clc
y1 = [104.9 105.8 107.7 106.9 108.1 111.3 115.3 118.1 116.9 117.8 118.4 116.9 118.3 123.5 121.4];
y2 = [104.9 105.3 106.1 106.3 106.7 107.4 108.5 109.7 110.5 111.2 111.9 112.3 118.3 121 121.1];
t = 2:length(y1)+1;
plot(t,y1,'k.','markersize',12)
xlabel('时间序号')
ylabel('全国消费指数')
title('原始数据图形')
%% 拟合
n = 6;
[p, S, mu] = polyfit(t, y1, n);
[y, delta] = polyval(p, t, S, mu);
%% 绘图
figure
plot(t,y1,'k.','markersize',12)
hold on
plot(t,y,'r')
xlabel('时间序号')
ylabel('全国消费指数')
title('原始数据图形')
legend('原始数据','多项式拟合直线','location','best')
r = poly2sym(p,sym('t'));
%% 回归类型判断
figure
plot(t,y2,'k.','markersize',12)
xlabel('时间序号')
ylabel('全国商品零售价格分类指数')
title('原始数据图形')
%% 参数估计
y2_new = log(y2);
corrcoef(t,y2_new) % 线性相关性检验
fun2 = @(beta,x) beta(1).*exp(beta(2)*x);
beta = [1,1];
nlm1 = NonLinearModel.fit(t,y2,fun2,beta);
nlm1
%% 绘制曲线
tnew = linspace(2, 16, 50)';
ynew2 = nlm1.predict(tnew);
plot(t, y2, 'k.','markersize',12);
hold on;
plot(tnew, ynew2, 'r-');
xlabel('时间序号(t)');
ylabel('全国商品零售价格分类指数(y)');
legend('原始数据点', '指数函数拟合回归曲线','location','best');
%% 异常值诊断
Res2 = nlm1.Residuals;
Res_Stu2 = Res2.Studentized;
Res_stan2 = Res2.Standardized;
% 查看残差图找出异常点
figure
subplot(1,2,1)
plot(Res_Stu2,'kx')
refline(0,-2)
refline(0,2)
subplot(1,2,2)
plot(Res_stan2,'kx')
refline(0,2)
refline(0,-2)
id2 = find(abs(Res_Stu2)>2);
%% 模型改进
tt = t;
tt(id2) = [];
y22 = y2;
y22(id2) = [];
nlm2 = NonLinearModel.fit(tt, y22, fun2, beta);
nlm2
%% 改进前后对比
figure
tnew = linspace(2, 16, 50)';
ynew2 = nlm1.predict(tnew);
ynew22 = nlm2.predict(tnew);
plot(t, y2, 'k.','markersize',12);
hold on;
plot(tnew, ynew2, 'r-', tnew, ynew22, 'g-.');
xlabel('时间序号(t)');
ylabel('全国商品零售价格分类指数(y)');
legend('原始数据点', '指数函数拟合回归曲线', '改进的指数函数拟合回归曲线');