在对回归问题的建模分析中,经常会遇到对回归问题的评估问题,如何评估回归模型的优劣呢,本文整理了sklearn中的metrics中关于回归问题的评估方法。
首先导入相应的函数库并建立模型
#导入相应的函数库
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import mean_absolute_error
import numpy as np
#加载数据
bos_house = datasets.load_boston()
bos_house_data = bos_house['data']
bos_house_target = bos_house['target']
#建模分析
x_train,x_test,y_train,y_test = train_test_split(bos_house_data,bos_house_target,random_state=41)
forest_reg = RandomForestRegressor(random_state=41)
forest_reg.fit(x_train,y_train)
y_pred = forest_reg.predict(x_test)
1、mean_squared_error(MSE 常用)
简称MSE,即均方误差,计算公式为:
M S E = 1 n ∑ i = 1 n ( y i − y i ^ ) 2 MSE=\frac{1}{n}\sum_{i=1}^{n}{(y_i-\hat{y_i})^2} MSE=n1∑i=1n(yi−yi^)2
一般使用RMSE进行评估(这个回归分析模型中最常用的评估方法):
R M S E = 1 n ∑ i = 1 n ( y i − y i ^ ) 2 RMSE=\sqrt{\frac{1}{n}\sum_{i=1}^{n}{(y_i-\hat{y_i})^2}} RMSE=n1∑i=1n(yi−yi^)2
print('MSE为:',mean_squared_error(y_test,y_pred))
print('MSE为(直接计算):',np.mean((y_test-y_pred)**2))
print('RMSE为:',np.sqrt(mean_squared_error(y_test,y_pred)))
#结果为
#MSE为: 20.2051377953
#MSE为(直接计算): 20.2051377953
#RMSE为: 4.49501254673
2、mean_squared_log_error
计算公式为:
M S L E = 1 n ∑ i = 1 n ( l o g ( y i + 1 ) − l o g ( y i ^ + 1 ) ) 2 MSLE=\frac{1}{n}\sum_{i=1}^{n}\left( log(y_i+1)-log(\hat{y_i}+1) \right)^2 MSLE=n1∑i=1n(log(yi+1)−log(yi^+1))2
print(mean_squared_log_error(y_test,y_pred))
print(np.mean((np.log(y_test+1)-np.log(y_pred+1))**2))
#0.0387712039735
#0.0387712039735
3、median_absolute_error
计算公式为:
m e d i a n ( ∣ y i − y i ^ ∣ ) median({|y_i-\hat{y_i}|}) median(∣yi−yi^∣)
print(median_absolute_error(y_test,y_pred))
print(np.median(np.abs(y_test-y_pred)))
#1.79
#1.79
4、mean_absolute_error(MAE)
计算公式为:
1 n ∑ i = 1 n ∣ y i − y i ^ ∣ \frac{1}{n}\sum_{i=1}^{n}{|y_i-\hat{y_i}|} n1∑i=1n∣yi−yi^∣
print(mean_absolute_error(y_test,y_pred))
print(np.mean(np.abs(y_test-y_pred)))
#2.7005511811
#2.7005511811
5、explained_variance_score
解释方差的得分,计算公式为:
1 − v a r ( y − y ^ ) v a r ( y ) 1-\frac{var(y-\hat{y})}{var(y)} 1−var(y)var(y−y^)
print(explained_variance_score(y_test,y_pred))
print(1-np.var(y_test-y_pred)/np.var(y_test))
#0.571770465868
#0.571770465868
6、r2_score
计算公式为:
R
2
(
y
,
y
^
)
=
1
−
∑
i
=
0
n
(
y
i
−
y
i
^
)
2
∑
i
=
1
n
(
y
i
−
y
ˉ
)
2
R^2(y,\hat{y})=1-\frac{\sum_{i=0}^{n}{(y_i-\hat{y_i})^2}}{\sum_{i=1}^{n}{(y_i-\bar{y})^2}}
R2(y,y^)=1−∑i=1n(yi−yˉ)2∑i=0n(yi−yi^)2
其
中
y
ˉ
=
1
n
∑
i
=
1
n
y
i
其中\bar{y}=\frac{1}{n}\sum_{i=1}^{n}y_i
其中yˉ=n1i=1∑nyi
print(r2_score(y_test,y_pred))
print(1-(np.sum((y_test-y_pred)**2))/np.sum((y_test -np.mean(y_test))**2))
#0.566962879097
#0.566962879097
附:所有代码如下:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import explained_variance_score
from sklearn.metrics import r2_score
import numpy as np
bos_house = datasets.load_boston()
bos_house_data = bos_house['data']
bos_house_target = bos_house['target']
x_train,x_test,y_train,y_test = train_test_split(bos_house_data,bos_house_target,random_state=41)
forest_reg = RandomForestRegressor(random_state=41)
forest_reg.fit(x_train,y_train)
y_pred = forest_reg.predict(x_test)
#mean_squared_error
print('MSE为:',mean_squared_error(y_test,y_pred))
print('MSE为(直接计算):',np.mean((y_test-y_pred)**2))
print('RMSE为:',np.sqrt(mean_squared_error(y_test,y_pred)))
#median_absolute_error
print(np.median(np.abs(y_test-y_pred)))
print(median_absolute_error(y_test,y_pred))
#mean_absolute_error
print(np.mean(np.abs(y_test-y_pred)))
print(mean_absolute_error(y_test,y_pred))
#mean_squared_log_error
print(mean_squared_log_error(y_test,y_pred))
print(np.mean((np.log(y_test+1)-np.log(y_pred+1))**2))
#explained_variance_score
print(explained_variance_score(y_test,y_pred))
print(1-np.var(y_test-y_pred)/np.var(y_test))
#r2_score
print(r2_score(y_test,y_pred))
print(1-(np.sum((y_test-y_pred)**2))/np.sum((y_test -np.mean(y_test))**2))