经过几天水水的学习,利用波斯顿房价数据集做了一点小小练习,并写此笔记来记录自己点滴实验心得。新手实验,望有经验人士勿喷,本人抛砖引玉,望得到宝贵建议。如今后有新的体会,则会更新笔记。
1、线性回归+留出法与lasso+留出法
#波士顿房价
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
#导入数据
house_price = datasets.load_boston()
house_price_feature = house_price.data
house_price_target = house_price.target
#导入数据
#留出法
house_price_feature_train, house_price_feature_test, house_price_target_train, house_price_target_test = train_test_split(house_price_feature,house_price_target,test_size=0.3)
#留出法
#创建分类器
LR_model = LinearRegression()#创建线性回归分类器
LR_model.fit(house_price_feature_train, house_price_target_train)#线性回归分类器训练
LA_model = Lasso()#创建lasso
LA_model.fit(house_price_feature_train, house_price_target_train)#lasso训练
#创建分类器
#预测
predict_results_LR = LR_model.predict(house_price_feature_test)#线性回归分类器预测
predict_results_LA = LA_model.predict(house_price_feature_test)#lasso预测
#预测
#指标分析
print("LR's MSE is ",mean_squared_error(house_price_target_test,predict_results_LR))
print("LR's MAE is ",mean_absolute_error(house_price_target_test,predict_results_LR))
print("LR's R Squared is ",r2_score(house_price_target_test,predict_results_LR))
print("LA's MSE is ",mean_squared_error(house_price_target_test,predict_results_LA))
print("LA's MAE is ",mean_absolute_error(house_price_target_test,predict_results_LA))
print("LA's R Squared is ",r2_score(house_price_target_test,predict_results_LA))
#指标分析
#画图
#线性回归预测结果画图
x = np.linspace(0,predict_results_LA.size,predict_results_LA.size)
plt.plot(x,predict_results_LR,"r--",label="predict",alpha=0.5)
plt.plot(x,house_price_target_test,"g-",label="real",alpha=0.5)
plt.legend(loc="best")
# plt.savefig("d:/image1",dpi=500,bbox_inches = 'tight')
plt.title("linear")
plt.show()
#线性回归预测结果画图
#lasso预测结果画图
plt.plot(x,predict_results_LA,"r--",label="predict",alpha=0.5)
plt.plot(x,house_price_target_test,"g-",label="real",alpha=0.5)
plt.legend(loc="best")
# plt.savefig("d:/image2",dpi=500,bbox_inches = 'tight')
plt.title("lasso")
plt.show()
#lasso预测结果画图
#画图
得到线性回归预测(留出法)图与lasso预测(留出法)图以及各项指标数据:
线性回归预测(留出法)图
lasso预测(留出法)图
指标数据图1
从各项指标数据可以看出,线性回归+留出法的各项指标都略胜一筹。
2、线性回归+k折交叉验证法与lasso+k折交叉验证法
(1)k=2时
#波士顿房价
#k折交叉验证法
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Lasso
#导入数据
house_price = datasets.load_boston()
house_price_feature = house_price.data
house_price_target = house_price.target
#导入数据
#创建
kf = KFold(n_splits=2)#创建k折交叉验证法
LR_model = LinearRegression()#创建线性回归分类器
LA_model = Lasso()#创建lasso
#创建
#指标变量定义
MSE_LR=0.0
MAE_LR=0.0
RS_LR=0.0
MSE_LA=0.0
MAE_LA=0.0
RS_LA=0.0
#指标变量定义
#k折交叉验证法循环
i=1
for train_index, test_index in kf.split(house_price_feature):#k折交叉验证法得到相应数据下标
house_price_feature_train, house_price_feature_test = house_price_feature[train_index], house_price_feature[test_index]
house_price_target_train, house_price_target_test = house_price_target[train_index], house_price_target[test_index]
#训练与预测
LR_model.fit(house_price_feature_train, house_price_target_train)#线性回归分类器训练
predict_results_LR = LR_model.predict(house_price_feature_test)#线性回归分类器预测
LA_model.fit(house_price_feature_train, house_price_target_train)#lasso训练
predict_results_LA = LA_model.predict(house_price_feature_test)#lasso预测
#训练与预测
#计算各个循环的指标
MSE_LR=MSE_LR+mean_squared_error(house_price_target_test,predict_results_LR)
MAE_LR=MAE_LR+mean_absolute_error(house_price_target_test,predict_results_LR)
RS_LR=RS_LR+r2_score(house_price_target_test,predict_results_LR)
MSE_LA=MSE_LA+mean_squared_error(house_price_target_test,predict_results_LA)
MAE_LA=MAE_LA+mean_absolute_error(house_price_target_test,predict_results_LA)
RS_LA=RS_LA+r2_score(house_price_target_test,predict_results_LA)
#计算各个循环的指标
#画图
i=i+1
x = np.linspace(0,predict_results_LA.size,predict_results_LA.size)
plt.plot(x,predict_results_LR,"r--",alpha=0.5)
plt.plot(x,house_price_target_test,"g-",alpha=0.5)
plt.title("LR")
plt.savefig("d:/image"+str(i)+"1",dpi=500,bbox_inches = 'tight')
plt.show()
plt.plot(x,predict_results_LA,"r--",alpha=0.5)
plt.plot(x,house_price_target_test,"g-",alpha=0.5)
plt.title("LA")
plt.savefig("d:/image"+str(i)+"2",dpi=500,bbox_inches = 'tight')
plt.show()
#画图
#k折交叉验证法循环
#指标分析
print("LR's MSE is ",MSE_LR/2)
print("LR's MAE is ",MAE_LR/2)
print("LR's R Squared is ",RS_LR/2)
print("LA's MSE is ",MSE_LA/2)
print("LA's MAE is ",MAE_LA/2)
print("LA's R Squared is ",RS_LA/2)
#指标分析
得到线性回归预测(k折交叉验证法)图与lasso预测(k折交叉验证法)以及各项指标数据(k=2):
此处因为k=2,因此各有两张图。
线性回归预测(k折交叉验证法)图
lasso预测(k折交叉验证法)图
指标数据图2
由于R Squared 为负,便不以R Squared为依据。以MSE和MAE的数据进行对比,lasso预测(k折交叉验证法)更胜一筹。
(2)k=22时
#波士顿房价
#k折交叉验证法
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import Lasso
#导入数据
house_price = datasets.load_boston()
house_price_feature = house_price.data
house_price_target = house_price.target
#导入数据
#创建
kf = KFold(n_splits=22)#创建k折交叉验证法
LR_model = LinearRegression()#创建线性回归分类器
LA_model = Lasso()#创建lasso
#创建
#指标变量定义
MSE_LR=0.0
MAE_LR=0.0
RS_LR=0.0
MSE_LA=0.0
MAE_LA=0.0
RS_LA=0.0
#指标变量定义
#k折交叉验证法循环
i=1
for train_index, test_index in kf.split(house_price_feature):#k折交叉验证法得到相应数据下标
house_price_feature_train, house_price_feature_test = house_price_feature[train_index], house_price_feature[test_index]
house_price_target_train, house_price_target_test = house_price_target[train_index], house_price_target[test_index]
#训练与预测
LR_model.fit(house_price_feature_train, house_price_target_train)#线性回归分类器训练
predict_results_LR = LR_model.predict(house_price_feature_test)#线性回归分类器预测
LA_model.fit(house_price_feature_train, house_price_target_train)#lasso训练
predict_results_LA = LA_model.predict(house_price_feature_test)#lasso预测
#训练与预测
#计算各个循环的指标
MSE_LR=MSE_LR+mean_squared_error(house_price_target_test,predict_results_LR)
MAE_LR=MAE_LR+mean_absolute_error(house_price_target_test,predict_results_LR)
RS_LR=RS_LR+r2_score(house_price_target_test,predict_results_LR)
MSE_LA=MSE_LA+mean_squared_error(house_price_target_test,predict_results_LA)
MAE_LA=MAE_LA+mean_absolute_error(house_price_target_test,predict_results_LA)
RS_LA=RS_LA+r2_score(house_price_target_test,predict_results_LA)
#计算各个循环的指标
#画图
i=i+1
x = np.linspace(0,predict_results_LA.size,predict_results_LA.size)
plt.plot(x,predict_results_LR,"r--",alpha=0.5)
plt.plot(x,house_price_target_test,"g-",alpha=0.5)
plt.title("LR")
plt.savefig("d:/image"+str(i)+"1",dpi=500,bbox_inches = 'tight')
plt.show()
plt.plot(x,predict_results_LA,"r--",alpha=0.5)
plt.plot(x,house_price_target_test,"g-",alpha=0.5)
plt.title("LA")
plt.savefig("d:/image"+str(i)+"2",dpi=500,bbox_inches = 'tight')
plt.show()
#画图
#k折交叉验证法循环
#指标分析
print("LR's MSE is ",MSE_LR/22)
print("LR's MAE is ",MAE_LR/22)
print("LR's R Squared is ",RS_LR/22)
print("LA's MSE is ",MSE_LA/22)
print("LA's MAE is ",MAE_LA/22)
print("LA's R Squared is ",RS_LA/22)
#指标分析
得到线性回归预测(k折交叉验证法)图与lasso预测(k折交叉验证法)以及各项指标数据(k=22),但因为图太多(k=22,意味着共有22*2张图片),就不放出来了。我们直接通过指标分析:
指标数据图3
通过指标可看出,线性回归+k折交叉验证法更加好。并且比较k=2与k=22的指标数据,可看出当k=22时,回归得更加好,指标数据明显优化。
因此可总结,线性回归在留出法中、k折交叉验证法(k=22)时,都表现得比较好,且在本实验中,线性回归在留出法中,比在k折交叉验证法中,要更好。