先附上代码
import pandas as pd
from sklearn.preprocessing import MinMaxScaler #归一化
from sklearn.model_selection import train_test_split # 拆分数据集
from sklearn.metrics import mean_squared_error # 均方误差
from sklearn.metrics import accuracy_score #精度
from sklearn.neighbors import KNeighborsRegressor # KNN回归器
from sklearn.linear_model import LinearRegression # 线性回归
from sklearn.linear_model import Ridge # 岭回归
from sklearn.linear_model import Lasso # LASSO 回归
# 获取数据
data = pd.read_csv('boston_house_prices.csv')
x, y = data.values[:,:-1], data.values[:,-1]
print("总共有{}个样本,样本特征数为:{}".format(x.shape[0], x.shape[1]))
#由于各个特征之间数量级相差较大,有的在10^-3,有的在10^2,故需要归一化
scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit(x)
scaler_x=scaler.transform(x)
#拆分训练集与测试集
x_train,x_test,y_train,y_test=train_test_split(scaler_x,y,test_size=0.3)
knn_train = KNeighborsRegressor(n_neighbors=4)
knn_train.fit(x_train,y_train)
#使用剩下的数据进行测试
y_knn_test=knn_train.predict(x_test)
y_knn_train=knn_train.predict(x_train)
train_knn_error=mean_squared_error(y_train,y_knn_train)
test_knn_error=mean_squared_error(y_test, y_knn_test)
print("KNN训练误差,测试误差为:{:.3f},{:.3f}".format(train_knn_error,test_knn_error))
#线性回归
line_train=LinearRegression()
line_train.fit(x_train,y_train)
y_line_test=line_train.predict(x_test)
y_line_train=line_train.predict(x_train)
train_line_error=mean_squared_error(y_train,y_line_train)
test_line_error=mean_squared_error(y_test, y_line_test)
print("线性回归训练误差,测试误差为:{:.3f},{:.3f}".format(train_line_error,test_line_error))
#岭回归
ridge_train=Ridge()
ridge_train.fit(x_train,y_train)
y_ridge_test=ridge_train.predict(x_test)
y_ridge_train=ridge_train.predict(x_train)
train_ridge_error=mean_squared_error(y_train,y_ridge_train)
test_ridge_error=mean_squared_error(y_test,y_ridge_test)
print("岭回归训练误差,测试误差为:{:.3f},{:.3f}".format(train_ridge_error,test_ridge_error))
#Lasso回归
lasso_train=Lasso()
lasso_train.fit(x_train,y_train)
y_lasso_test=lasso_train.predict(x_test)
y_lasso_train=lasso_train.predict(x_train)
train_lasso_error=mean_squared_error(y_train,y_lasso_train)
test_lasso_error=mean_squared_error(y_test,y_lasso_test)
print("lasso回归训练误差,测试误差为:{:.3f},{:.3f}".format(train_lasso_error,test_lasso_error))
new_data = pd.read_csv('new_house.csv')
new_x=new_data.values[:,:]
new_y_knn=knn_train.predict(new_x)
print("房价预测为:",new_y_knn)
样本数据如图
![](https://img-blog.csdnimg.cn/direct/a7c9ad84fe014810bec1f515fac84e6e.png)
预测结果
![](https://img-blog.csdnimg.cn/direct/c985a7ffad864d30811ee6d922f93117.png)