import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
数据
boston = datasets.load_boston()
只使用房价数量这个特征
x = boston.data[:,5]#只使用房间数量这个特征
y = boston.target
plt.scatter(x,y)#上限点
去掉上限点
#返回新的索引
x = x[y < 50]
y = y[y < 50]
plt.scatter(x,y)
train_test_split
from Simple_linear_Regression.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,seed=666)
from Simple_linear_Regression.SimpleLinearRegression import Simple_linear_Regression2
reg = Simple_linear_Regression2()
print(reg.fit(x_train,y_train))
Simple_linear_Regression2
print(reg.a_)
print(reg.b_)
7.8608543562689555
-27.459342806705543
plt.scatter(x_train,y_train)
plt.plot(x_train,reg.predict(x_train),color = 'r')
y_predict = reg.predict(x_test)
使用自己封装的mse,rmse,mae
from Simple_linear_Regression.metrics import mean_squared_error
from Simple_linear_Regression.metrics import root_mean_squared_error
from Simple_linear_Regression.metrics import mean_absolute_error
print(mean_squared_error(y_test,y_predict))
print(root_mean_squared_error(y_test,y_predict))
print(mean_absolute_error(y_test,y_predict))
24.156602134387438
4.914936635846635
3.5430974409463873
封装的代码
def mean_squared_error(y_true,y_predict):
assert len(y_true) == len(y_predict),\
'the size of y_true must be equal to the size of y_predict'
return np.sum((y_true - y_predict) ** 2) / len(y_true)
def root_mean_squared_error(y_true,y_predict):
return np.sqrt(mean_squared_error(y_true,y_predict))
def mean_absolute_error(y_true,y_predict):
assert len(y_true) == len(y_predict), \
'the size of y_true must be equal to the size of y_predict'
return np.sum(np.absolute(y_true - y_predict)) / len(y_predict)
分类问题
回归问题如何评价呢
均方误差MES(mean squared error)
改变量纲
RMSE(Root Mean Squared error)
平均绝对误差MAE(虽然不可导,但是评价一个算法是可以的)
j尽量让RMSE小。。