波斯顿房价(lasso、线性回归、留出法、k折交叉验证法)

经过几天水水的学习,利用波斯顿房价数据集做了一点小小练习,并写此笔记来记录自己点滴实验心得。新手实验,望有经验人士勿喷,本人抛砖引玉,望得到宝贵建议。如今后有新的体会,则会更新笔记。

1、线性回归+留出法与lasso+留出法

#波士顿房价
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

#导入数据
house_price = datasets.load_boston()
house_price_feature = house_price.data
house_price_target = house_price.target
#导入数据

#留出法
house_price_feature_train, house_price_feature_test, house_price_target_train, house_price_target_test = train_test_split(house_price_feature,house_price_target,test_size=0.3)
#留出法

#创建分类器
LR_model = LinearRegression()#创建线性回归分类器
LR_model.fit(house_price_feature_train, house_price_target_train)#线性回归分类器训练
LA_model = Lasso()#创建lasso
LA_model.fit(house_price_feature_train, house_price_target_train)#lasso训练
#创建分类器

#预测
predict_results_LR = LR_model.predict(house_price_feature_test)#线性回归分类器预测
predict_results_LA = LA_model.predict(house_price_feature_test)#lasso预测
#预测

#指标分析
print("LR's MSE is ",mean_squared_error(house_price_target_test,predict_results_LR))
print("LR's MAE is ",mean_absolute_error(house_price_target_test,predict_results_LR))
print("LR's R Squared is ",r2_score(house_price_target_test,predict_results_LR))

print("LA's MSE is ",mean_squared_error(house_price_target_test,predict_results_LA))
print("LA's MAE is ",mean_absolute_error(house_price_target_test,predict_results_LA))
print("LA's R Squared is ",r2_score(house_price_target_test,predict_results_LA))
#指标分析

#画图
#线性回归预测结果画图
x = np.linspace(0,predict_results_LA.size,predict_results_LA.size)
plt.plot(x,predict_results_LR,"r--",label="predict",alpha=0.5)
plt.plot(x,house_price_target_test,"g-",label="real",alpha=0.5)
plt.legend(loc="best")
# plt.savefig("d:/image1",dpi=500,bbox_inches = 'tight')
plt.title("linear")
plt.show()
#线性回归预测结果画图

#lasso预测结果画图
plt.plot(x,predict_results_LA,"r--",label="predict",alpha=0.5)
plt.plot(x,house_price_target_test,"g-",label="real",alpha=0.5)
plt.legend(loc="best")
# plt.savefig("d:/image2",dpi=500,bbox_inches = 'tight')
plt.title("lasso")
plt.show()
#lasso预测结果画图
#画图

得到线性回归预测(留出法)图与lasso预测(留出法)图以及各项指标数据:
线性回归预测(留出法)图
线性回归预测(留出法)图

在这里插入图片描述
lasso预测(留出法)图

留出法指标数据
指标数据图1

从各项指标数据可以看出,线性回归+留出法的各项指标都略胜一筹。

2、线性回归+k折交叉验证法与lasso+k折交叉验证法

(1)k=2时

#波士顿房价
#k折交叉验证法
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Lasso

#导入数据
house_price = datasets.load_boston()
house_price_feature = house_price.data
house_price_target = house_price.target
#导入数据

#创建
kf = KFold(n_splits=2)#创建k折交叉验证法
LR_model = LinearRegression()#创建线性回归分类器
LA_model = Lasso()#创建lasso
#创建

#指标变量定义
MSE_LR=0.0
MAE_LR=0.0
RS_LR=0.0

MSE_LA=0.0
MAE_LA=0.0
RS_LA=0.0
#指标变量定义


#k折交叉验证法循环
i=1
for train_index, test_index in kf.split(house_price_feature):#k折交叉验证法得到相应数据下标
    house_price_feature_train, house_price_feature_test = house_price_feature[train_index], house_price_feature[test_index]
    house_price_target_train, house_price_target_test = house_price_target[train_index], house_price_target[test_index]
    
    #训练与预测
    LR_model.fit(house_price_feature_train, house_price_target_train)#线性回归分类器训练
    predict_results_LR = LR_model.predict(house_price_feature_test)#线性回归分类器预测
    LA_model.fit(house_price_feature_train, house_price_target_train)#lasso训练
    predict_results_LA = LA_model.predict(house_price_feature_test)#lasso预测
    #训练与预测
    
    #计算各个循环的指标
    MSE_LR=MSE_LR+mean_squared_error(house_price_target_test,predict_results_LR)
    MAE_LR=MAE_LR+mean_absolute_error(house_price_target_test,predict_results_LR)
    RS_LR=RS_LR+r2_score(house_price_target_test,predict_results_LR)
    
    MSE_LA=MSE_LA+mean_squared_error(house_price_target_test,predict_results_LA)
    MAE_LA=MAE_LA+mean_absolute_error(house_price_target_test,predict_results_LA)
    RS_LA=RS_LA+r2_score(house_price_target_test,predict_results_LA)
    #计算各个循环的指标
    
    #画图
    i=i+1
    x = np.linspace(0,predict_results_LA.size,predict_results_LA.size)
    plt.plot(x,predict_results_LR,"r--",alpha=0.5)
    plt.plot(x,house_price_target_test,"g-",alpha=0.5)
    plt.title("LR")
    plt.savefig("d:/image"+str(i)+"1",dpi=500,bbox_inches = 'tight')
    plt.show()
    
    plt.plot(x,predict_results_LA,"r--",alpha=0.5)
    plt.plot(x,house_price_target_test,"g-",alpha=0.5)
    plt.title("LA")
    plt.savefig("d:/image"+str(i)+"2",dpi=500,bbox_inches = 'tight')  
    plt.show()
    #画图
    
#k折交叉验证法循环

#指标分析
print("LR's MSE is ",MSE_LR/2)
print("LR's MAE is ",MAE_LR/2)
print("LR's R Squared is ",RS_LR/2)

print("LA's MSE is ",MSE_LA/2)
print("LA's MAE is ",MAE_LA/2)
print("LA's R Squared is ",RS_LA/2)
#指标分析

得到线性回归预测(k折交叉验证法)图与lasso预测(k折交叉验证法)以及各项指标数据(k=2):
此处因为k=2,因此各有两张图。

线性回归预测(k折交叉验证法)图1
线性回归预测(k折交叉验证法)图2
线性回归预测(k折交叉验证法)图

lasso预测(k折交叉验证法)图1
lasso预测(k折交叉验证法)图2
lasso预测(k折交叉验证法)图

交叉验证法指标数据
指标数据图2

由于R Squared 为负,便不以R Squared为依据。以MSE和MAE的数据进行对比,lasso预测(k折交叉验证法)更胜一筹。

(2)k=22时

#波士顿房价
#k折交叉验证法
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Lasso

#导入数据
house_price = datasets.load_boston()
house_price_feature = house_price.data
house_price_target = house_price.target
#导入数据

#创建
kf = KFold(n_splits=22)#创建k折交叉验证法
LR_model = LinearRegression()#创建线性回归分类器
LA_model = Lasso()#创建lasso
#创建

#指标变量定义
MSE_LR=0.0
MAE_LR=0.0
RS_LR=0.0

MSE_LA=0.0
MAE_LA=0.0
RS_LA=0.0
#指标变量定义


#k折交叉验证法循环
i=1
for train_index, test_index in kf.split(house_price_feature):#k折交叉验证法得到相应数据下标
    house_price_feature_train, house_price_feature_test = house_price_feature[train_index], house_price_feature[test_index]
    house_price_target_train, house_price_target_test = house_price_target[train_index], house_price_target[test_index]
    
    #训练与预测
    LR_model.fit(house_price_feature_train, house_price_target_train)#线性回归分类器训练
    predict_results_LR = LR_model.predict(house_price_feature_test)#线性回归分类器预测
    LA_model.fit(house_price_feature_train, house_price_target_train)#lasso训练
    predict_results_LA = LA_model.predict(house_price_feature_test)#lasso预测
    #训练与预测
    
    #计算各个循环的指标
    MSE_LR=MSE_LR+mean_squared_error(house_price_target_test,predict_results_LR)
    MAE_LR=MAE_LR+mean_absolute_error(house_price_target_test,predict_results_LR)
    RS_LR=RS_LR+r2_score(house_price_target_test,predict_results_LR)
    
    MSE_LA=MSE_LA+mean_squared_error(house_price_target_test,predict_results_LA)
    MAE_LA=MAE_LA+mean_absolute_error(house_price_target_test,predict_results_LA)
    RS_LA=RS_LA+r2_score(house_price_target_test,predict_results_LA)
    #计算各个循环的指标
    
    #画图
    i=i+1
    x = np.linspace(0,predict_results_LA.size,predict_results_LA.size)
    plt.plot(x,predict_results_LR,"r--",alpha=0.5)
    plt.plot(x,house_price_target_test,"g-",alpha=0.5)
    plt.title("LR")
    plt.savefig("d:/image"+str(i)+"1",dpi=500,bbox_inches = 'tight')
    plt.show()
    
    plt.plot(x,predict_results_LA,"r--",alpha=0.5)
    plt.plot(x,house_price_target_test,"g-",alpha=0.5)
    plt.title("LA")
    plt.savefig("d:/image"+str(i)+"2",dpi=500,bbox_inches = 'tight')  
    plt.show()
    #画图
    
#k折交叉验证法循环

#指标分析
print("LR's MSE is ",MSE_LR/22)
print("LR's MAE is ",MAE_LR/22)
print("LR's R Squared is ",RS_LR/22)

print("LA's MSE is ",MSE_LA/22)
print("LA's MAE is ",MAE_LA/22)
print("LA's R Squared is ",RS_LA/22)
#指标分析

得到线性回归预测(k折交叉验证法)图与lasso预测(k折交叉验证法)以及各项指标数据(k=22),但因为图太多(k=22,意味着共有22*2张图片),就不放出来了。我们直接通过指标分析:

交叉验证法指标数据2
指标数据图3

通过指标可看出,线性回归+k折交叉验证法更加好。并且比较k=2与k=22的指标数据,可看出当k=22时,回归得更加好,指标数据明显优化。

因此可总结,线性回归在留出法中、k折交叉验证法(k=22)时,都表现得比较好,且在本实验中,线性回归在留出法中,比在k折交叉验证法中,要更好。

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值