回归项目实例
1.导入并理解数据
import numpy as np
from numpy import arange
import matplotlib.pyplot as plt
from pandas import read_csv,set_option
from pandas.plotting import scatter_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
num_folds=10
seed=7
kfold=KFold(n_splits=num_folds, random_state=seed)
model=LinearRegression()
scoring='neg_mean_squared_error'
result=cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
print('Linear_Regression:%.3f'%result.mean())
Linear_Regression:-34.942
print(data.shape)
(506, 14)
print(data.dtypes)
CRIM float64
ZN int64
INDUS float64
CHAS int64
NOX float64
RM float64
AGE float64
DIS float64
RAD int64
TAX int64
PRTATIO int64
B float64
LSTAT float64
MEDV float64
dtype: object
#set_option('display.line_width', 120)
print(data.head(30))
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PRTATIO \
0 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15
1 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17
2 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17
3 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18
4 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18
5 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18
6 0.08829 12 7.87 0 0.524 6.012 66.6 5.5605 5 311 15
7 0.14455 12 7.87 0 0.524 6.172 96.1 5.9505 5 311 15
8 0.21124 12 7.87 0 0.524 5.631 100.0 6.0821 5 311 15
9 0.17004 12 7.87 0 0.524 6.004 85.9 6.5921 5 311 15
10 0.22489 12 7.87 0 0.524 6.377 94.3 6.3467 5 311 15
11 0.11747 12 7.87 0 0.524 6.009 82.9 6.2267 5 311 15
12 0.09378 12 7.87 0 0.524 5.889 39.0 5.4509 5 311 15
13 0.62976 0 8.14 0 0.538 5.949 61.8 4.7075 4 307 21
14 0.63796 0 8.14 0 0.538 6.096 84.5 4.4619 4 307 21
15 0.62739 0 8.14 0 0.538 5.834 56.5 4.4986 4 307 21
16 1.05393 0 8.14 0 0.538 5.935 29.3 4.4986 4 307 21
17 0.78420 0 8.14 0 0.538 5.990 81.7 4.2579 4 307 21
18 0.80271 0 8.14 0 0.538 5.456 36.6 3.7965 4 307 21
19 0.72580 0 8.14 0 0.538 5.727 69.5 3.7965 4 307 21
20 1.25179 0 8.14 0 0.538 5.570 98.1 3.7979 4 307 21
21 0.85204 0 8.14 0 0.538 5.965 89.2 4.0123 4 307 21
22 1.23247 0 8.14 0 0.538 6.142 91.7 3.9769 4 307 21
23 0.98843 0 8.14 0 0.538 5.813 100.0 4.0952 4 307 21
24 0.75026 0 8.14 0 0.538 5.924 94.1 4.3996 4 307 21
25 0.84054 0 8.14 0 0.538 5.599 85.7 4.4546 4 307 21
26 0.67191 0 8.14 0 0.538 5.813 90.3 4.6820 4 307 21
27 0.95577 0 8.14 0 0.538 6.047 88.8 4.4534 4 307 21
28 0.77299 0 8.14 0 0.538 6.495 94.4 4.4547 4 307 21
29 1.00245 0 8.14 0 0.538 6.674 87.3 4.2390 4 307 21
B LSTAT MEDV
0 396.90 4.98 24.0
1 396.90 9.14 21.6
2 392.83 4.03 34.7
3 394.63 2.94 33.4
4 396.90 5.33 36.2
5 394.12 5.21 28.7
6 395.60 12.43 22.9
7 396.90 19.15 27.1
8 386.63 29.93 16.5
9 386.71 17.10 18.9
10 392.52 20.45 15.0
11 396.90 13.27 18.9
12 390.50 15.71 21.7
13 396.90 8.26 20.4
14 380.02 10.26 18.2
15 395.62 8.47 19.9
16 386.85 6.58 23.1
17 386.75 14.67 17.5
18 288.99 11.69 20.2
19 390.95 11.28 18.2
20 376.57 21.02 13.6
21 392.53 13.83 19.6
22 396.90 18.72 15.2
23 394.54 19.88 14.5
24 394.33 16.30 15.6
25 303.42 16.51 13.9
26 376.88 14.81 16.6
27 306.38 17.28 14.8
28 387.94 12.80 18.4
29 380.23 11.98 21.0
set_option('precision',2)
print(data.corr(method='pearson'))
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \
CRIM 1.00 -0.20 0.41 -5.59e-02 0.42 -0.22 0.35 -0.38 6.26e-01 0.58
ZN -0.20 1.00 -0.53 -4.25e-02 -0.52 0.31 -0.57 0.66 -3.12e-01 -0.31
INDUS 0.41 -0.53 1.00 6.29e-02 0.76 -0.39 0.64 -0.71 5.95e-01 0.72
CHAS -0.06 -0.04 0.06 1.00e+00 0.09 0.09 0.09 -0.10 -7.37e-03 -0.04
NOX 0.42 -0.52 0.76 9.12e-02 1.00 -0.30 0.73 -0.77 6.11e-01 0.67
RM -0.22 0.31 -0.39 9.13e-02 -0.30 1.00 -0.24 0.21 -2.10e-01 -0.29
AGE 0.35 -0.57 0.64 8.65e-02 0.73 -0.24 1.00 -0.75 4.56e-01 0.51
DIS -0.38 0.66 -0.71 -9.92e-02 -0.77 0.21 -0.75 1.00 -4.95e-01 -0.53
RAD 0.63 -0.31 0.60 -7.37e-03 0.61 -0.21 0.46 -0.49 1.00e+00 0.91
TAX 0.58 -0.31 0.72 -3.56e-02 0.67 -0.29 0.51 -0.53 9.10e-01 1.00
PRTATIO 0.30 -0.38 0.40 -1.30e-01 0.21 -0.35 0.27 -0.24 4.86e-01 0.48
B -0.39 0.18 -0.36 4.88e-02 -0.38 0.13 -0.27 0.29 -4.44e-01 -0.44
LSTAT 0.46 -0.41 0.60 -5.39e-02 0.59 -0.61 0.60 -0.50 4.89e-01 0.54
MEDV -0.39 0.36 -0.48 1.75e-01 -0.43 0.70 -0.38 0.25 -3.82e-01 -0.47
PRTATIO B LSTAT MEDV
CRIM 0.30 -0.39 0.46 -0.39
ZN -0.38 0.18 -0.41 0.36
INDUS 0.40 -0.36 0.60 -0.48
CHAS -0.13 0.05 -0.05 0.18
NOX 0.21 -0.38 0.59 -0.43
RM -0.35 0.13 -0.61 0.70
AGE 0.27 -0.27 0.60 -0.38
DIS -0.24 0.29 -0.50 0.25
RAD 0.49 -0.44 0.49 -0.38
TAX 0.48 -0.44 0.54 -0.47
PRTATIO 1.00 -0.18 0.38 -0.51
B -0.18 1.00 -0.37 0.33
LSTAT 0.38 -0.37 1.00 -0.74
MEDV -0.51 0.33 -0.74 1.00
#直方图
data.hist(sharex=False,sharey=False,xlabelsize=1,ylabelsize=1)
plt.show()
#密度图
data.plot(kind='density',subplots=True, layout=(4,4),sharex=False,fontsize=1)
plt.show()
#箱线图
data.plot(kind='box',subplots=True, layout=(4,4),sharex=False,fontsize=1)
plt.show()
#多重数据图表
#散点矩阵图
scatter_matrix(data)
plt.show()
#相关矩阵图
fig=plt.figure()
ax=fig.add_subplot(111)
cax=ax.matshow(data.corr(),vmin=-1,vmax=1, interpolation='none')
fig.colorbar(cax)
ticks=np.arange(0,14,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
plt.show()
通过数据的相关性和数据的分布等发现,数据集中的数据结构比较复杂,需要考虑对数据进行转换,以提高模型的准确度。可以尝试以下几个方面:
1)通过特征选择来减少大部分相关性高的特征;
2)通过标准化数据来降低不同数据度量单位带来的影响;
3)通过正态化数据来降低不同数据分布结构,以提高算法的准确度。
2.评估算法
import numpy as np
from numpy import arange
import matplotlib.pyplot as plt
from pandas import read_csv,set_option
from pandas.plotting import scatter_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
validation_size=.2
seed= 7
X_train,X_validation,Y_train,Y_validation=\
train_test_split(X,Y,test_size=validation_size,random_state=seed)
#评估算法——评估标准
num_folds=10
scoring='neg_mean_squared_error'
#算法审查
models={}
models['LR']=LinearRegression()
models['Lasso']=Lasso()
models['EN']=ElasticNet()
models['KNN']=KNeighborsRegressor()
models['CART']=DecisionTreeRegressor()
models['SVM']=SVR()
#评估算法
results=[]
for key in models:
kfold = KFold(n_splits= num_folds, random_state=seed)
cv_result = cross_val_score(models[key], X_train, Y_train, cv=kfold, scoring=scoring)
results.append(cv_result)
print('%s: %f (%f)' %(key, cv_result.mean(), cv_result.std()))
运行结果为:
LR: -21.491485 (9.487715)
Lasso: -26.441306 (11.668888)
EN: -27.573242 (12.345126)
KNN: -41.556566 (13.934857)
CART: -22.023942 (9.936681)
SVM: -85.518088 (31.994782)
从执行结果来看,线性回归(LR)具有最有的MSE,接下来时CART 算法。
再查看10折交叉验证的结果:
#箱线图10折交叉验证比较算法
fig=plt.figure()
fig.suptitle("Algorithm Comparision")
ax=fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(models.keys())
plt.show()
从图中可以看出,线性算法(LR,Lasso,EN)的分布比较接近,并且K近邻算法的结果分布比较紧凑。
不同的数据度量单位,或许是K 近邻算法和SVM 算法表现不佳的主要原因。
3.评估算法——正态化数据
pipelines={}
pipelines['ScalerLR']=Pipeline([('Scaler',StandardScaler()),('LR',LinearRegression())])
pipelines['ScalerLasso']=Pipeline([('Scaler',StandardScaler()),('Lasso',Lasso())])
pipelines['ScalerEN']=Pipeline([('Scaler',StandardScaler()),('EN',ElasticNet())])
pipelines['ScalerKNN']=Pipeline([('Scaler',StandardScaler()),('KNN',KNeighborsRegressor())])
pipelines['ScalerCART']=Pipeline([('Scaler',StandardScaler()),('CART',DecisionTreeRegressor())])
pipelines['ScalerSVM']=Pipeline([('Scaler',StandardScaler()),('SVM',SVR())])
#评估算法
results=[]
for key in pipelines:
kfold = KFold(n_splits= num_folds, random_state=seed)
cv_result = cross_val_score(pipelines[key], X_train, Y_train, cv=kfold, scoring=scoring)
results.append(cv_result)
print('%s: %f (%f)' %(key, cv_result.mean(), cv_result.std()))
结果:
ScalerKNN: -20.223380 (12.320434)
ScalerCART: -24.293780 (11.409124)
ScalerSVM: -29.677837 (17.103985)
ScalerEN: -28.072774 (10.673516)
ScalerLR: -21.491485 (9.487715)
ScalerLasso: -26.687910 (9.035163)
接下来看一下10折交叉验证的结果:
#箱线图10折交叉验证比较算法
fig=plt.figure()
fig.suptitle("Algorithm Comparision")
ax=fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(models.keys())
plt.show()
可以看出K近邻算法具有最有的MSE 和最紧凑的数据分布。
4.调参改善算法
K近邻算法对做过数据转换的数据集有很好的结果,但是是否可以进一步对结果做一些优化呢?
调参——近邻个数(默认为5),可以通过网格搜索算法来优化参数。
import numpy as np
from numpy import arange
import matplotlib.pyplot as plt
from pandas import read_csv,set_option
from pandas.plotting import scatter_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
validation_size=.2
seed= 7
X_train,X_validation,Y_train,Y_validation=\
train_test_split(X,Y,test_size=validation_size,random_state=seed)
#评估算法——正态化数据
num_folds=10
scoring='neg_mean_squared_error'
scaler=StandardScaler().fit(X_train)
rescaledX=scaler.transform(X_train)
param_grid={'n_neighbors':[1,3,5,7,9,11,13,15,17,19,21]}
model=KNeighborsRegressor()
kfold = KFold(n_splits= num_folds, random_state=seed)
grid=GridSearchCV(estimator=model,param_grid=param_grid, scoring=scoring,cv=kfold)
grid_result = grid.fit(X=rescaledX,y=Y_train)
print('最优:%s使用%s'%(grid_result.best_score_,grid_result.best_params_))
cv_results=zip(grid_result.cv_results_['mean_test_score'],
grid_result.cv_results_['std_test_score'],
grid_result.cv_results_['params'])
for mean, std, param in cv_results:
print('%f (%f) with %r'%(mean, std, param))
运行结果为:
最优:-18.973905390539052使用{'n_neighbors': 3}
-20.854901 (17.993690) with {'n_neighbors': 1}
-18.973905 (12.876029) with {'n_neighbors': 3}
-20.194390 (12.281323) with {'n_neighbors': 5}
-20.515713 (12.402170) with {'n_neighbors': 7}
-20.300643 (11.411014) with {'n_neighbors': 9}
-20.997007 (11.730952) with {'n_neighbors': 11}
-21.311544 (12.354437) with {'n_neighbors': 13}
-21.497361 (11.474079) with {'n_neighbors': 15}
-22.989202 (11.738922) with {'n_neighbors': 17}
-23.913369 (11.546220) with {'n_neighbors': 19}
-24.596780 (11.597516) with {'n_neighbors': 21}
最优结果——K近邻算法的最优参数为k=3.
5.集成算法
除调参之外,提高模型准确度的方法是使用集成算法。
import numpy as np
from numpy import arange
import matplotlib.pyplot as plt
from pandas import read_csv,set_option
from pandas.plotting import scatter_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
validation_size=.2
seed= 7
X_train,X_validation,Y_train,Y_validation=\
train_test_split(X,Y,test_size=validation_size,random_state=seed)
#评估算法——评估标准
num_folds=10
scoring='neg_mean_squared_error'
ensembles={}
ensembles['ScalerAB']=Pipeline([('Scaler',StandardScaler()),('AB',AdaBoostRegressor())])
ensembles['ScalerAB_KNN']=Pipeline([('Scaler',StandardScaler()),('ABKNN',AdaBoostRegressor(base_estimator=KNeighborsRegressor(n_neighbors=3)))])
ensembles['ScalerAB_LR']=Pipeline([('Scaler',StandardScaler()),('ABLR',AdaBoostRegressor(LinearRegression()))])
ensembles['ScalerRFR']=Pipeline([('Scaler',StandardScaler()),('RFR',RandomForestRegressor())])
ensembles['ScalerETR']=Pipeline([('Scaler',StandardScaler()),('ETR',ExtraTreesRegressor())])
ensembles['ScalerGBR']=Pipeline([('Scaler',StandardScaler()),('GBR',GradientBoostingRegressor())])
#评估算法
results=[]
for key in ensembles:
kfold = KFold(n_splits= num_folds, random_state=seed)
cv_result = cross_val_score(ensembles[key], X_train, Y_train, cv=kfold, scoring=scoring)
results.append(cv_result)
print('%s: %f (%f)' %(key, cv_result.mean(), cv_result.std()))
运行结果:
ScRFR: -13.706623 (6.606411)
ScAB_KNN: -16.619421 (11.282793)
ScGBR: -10.458091 (5.037612)
ScAB: -15.337095 (6.908927)
ScAB_LR: -24.369701 (7.877789)
ScETR: -10.601365 (6.612611)
箱线图:
6.集成算法调参
集成算法都有一个参数n_estimators,这是一个很好的可以用来调整的参数。
import numpy as np
from numpy import arange
import matplotlib.pyplot as plt
from pandas import read_csv,set_option
from pandas.plotting import scatter_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
validation_size=.2
seed= 7
X_train,X_validation,Y_train,Y_validation=\
train_test_split(X,Y,test_size=validation_size,random_state=seed)
#评估算法——评估标准
num_folds=10
scoring='neg_mean_squared_error'
scaler=StandardScaler().fit(X_train)
rescaledX=scaler.transform(X_train)
param_grid={'n_estimators':[10,20,100,200,300,400,500,600,700,800,900]}
model=GradientBoostingRegressor()
kfold = KFold(n_splits= num_folds, random_state=seed)
grid=GridSearchCV(estimator=model,param_grid=param_grid, scoring=scoring,cv=kfold)
grid_result = grid.fit(X=rescaledX,y=Y_train)
print('最优:%s使用%s'%(grid_result.best_score_,grid_result.best_params_))
cv_results=zip(grid_result.cv_results_['mean_test_score'],
grid_result.cv_results_['std_test_score'],
grid_result.cv_results_['params'])
for mean, std, param in cv_results:
print('%f (%f) with %r'%(mean, std, param))
scaler=StandardScaler().fit(X_train)
rescaledX=scaler.transform(X_train)
param_grid={'n_estimators':[10,20,30,40,50,60,70,80,90]}
model=ExtraTreesRegressor()
kfold = KFold(n_splits= num_folds, random_state=seed)
grid=GridSearchCV(estimator=model,param_grid=param_grid, scoring=scoring,cv=kfold)
grid_result = grid.fit(X=rescaledX,y=Y_train)
print('最优:%s使用%s'%(grid_result.best_score_,grid_result.best_params_))
cv_results=zip(grid_result.cv_results_['mean_test_score'],
grid_result.cv_results_['std_test_score'],
grid_result.cv_results_['params'])
for mean, std, param in cv_results:
print('%f (%f) with %r'%(mean, std, param))
运行结果:
最优:-9.739200997481898使用{'n_estimators': 300}
-24.759323 (8.061990) with {'n_estimators': 10}
-14.127978 (5.405834) with {'n_estimators': 20}
-10.628260 (5.030331) with {'n_estimators': 100}
-10.048624 (4.887485) with {'n_estimators': 200}
-9.739201 (4.702751) with {'n_estimators': 300}
-9.742912 (4.665102) with {'n_estimators': 400}
-9.864004 (4.727039) with {'n_estimators': 500}
-9.859123 (4.728447) with {'n_estimators': 600}
-10.035033 (4.793662) with {'n_estimators': 700}
-10.025577 (4.858035) with {'n_estimators': 800}
-9.863188 (4.816367) with {'n_estimators': 900}
最优:-9.081163870553716使用{'n_estimators': 90}
-10.192213 (5.182143) with {'n_estimators': 10}
-10.587533 (6.203045) with {'n_estimators': 20}
-9.739134 (5.409255) with {'n_estimators': 30}
-9.550155 (5.421122) with {'n_estimators': 40}
-9.767894 (5.833083) with {'n_estimators': 50}
-9.417032 (5.107689) with {'n_estimators': 60}
-9.205675 (5.484864) with {'n_estimators': 70}
-9.615868 (5.450794) with {'n_estimators': 80}
-9.081164 (5.395927) with {'n_estimators': 90}