波士顿房价数据集已经上传资源,免积分下载
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
plt.rcParams['font.sans-serif'] = ['KaiTi']
data_path1 = 'housing_true.xls'
hd1 = pd.read_excel(data_path1)
data_ = hd1.drop(['MEDV'],axis=1)
y1 = hd1['MEDV']
y = np.reshape(y1,len(y1),1)
#将数据集7:3比例分割
x_train, x_test, y_train, y_test =train_test_split(data_,y,test_size=0.3,random_state=1)
#训练
lr = linear_model.LinearRegression()
lr.fit(x_train,y_train)
score_test = lr.score(x_test,y_test)
print(score_test)
#print(lr.coef_)#斜率
#plt.plot(lr.coef_)#斜率可视化
test_pre = lr.predict(x_test)
#print(test_pre)
#print(y_test)
dev = y_test-test_pre #偏差
#print(dev)
RMSE = np.sum(np.sqrt(dev*dev))/152#共152数据
print(RMSE)#均方根误差
x_data = range(0,len(x_test))
y_data1 = y_test
#print(len(y_data1))
y_data2 = test_pre
plt.figure(figsize=(20,8),dpi=80)
plt.plot(x_data,y_data1,label='实际值')
plt.plot(x_data,y_data2,label='预测值')
plt.grid(alpha = 0.3)#网格,alpha为透明度
plt.ylabel("房价")
plt.legend()
plt.show()
##保存预测的值
#result = {'pre':test_pre}
#result_file = pd.DataFrame(result)
#result_file.head()
#result_file.to_csv('test_pre_housing_true.csv')#保存预测的值
使用的sklearn的LinearRegression