import numpy as np
import matplotlib. pyplot as plt
% matplotlib inline
from sklearn. ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn import tree
X = np. array( [ [ 800 , 3 ] , [ 1200 , 1 ] , [ 1800 , 4 ] , [ 2500 , 2 ] ] )
y = np. array( [ 14 , 16 , 24 , 26 ] )
gbdt= GradientBoostingClassifier( n_estimators= 10 )
gbdt. fit( X, y)
GradientBoostingClassifier(criterion='friedman_mse', init=None,
learning_rate=0.1, loss='deviance', max_depth=3,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=10,
n_iter_no_change=None, presort='auto',
random_state=None, subsample=1.0, tol=0.0001,
validation_fraction=0.1, verbose=0,
warm_start=False)
gbdt. predict( X)
array([14, 16, 24, 26])
gbdt[ 0 , 0 ] . predict( X)
array([ 3., -1., -1., -1.])
gbdt[ - 1 , 0 ] . predict( X)
array([ 0.98250675, -0.81422807, -0.81422807, -0.81422807])
gbdt = GradientBoostingRegressor( n_estimators= 10 )
gbdt. fit( X, y)
gbdt. predict( X)
array([ 16.09207064, 17.39471376, 22.60528624, 23.90792936])
( ( y - y. mean( ) ) ** 2 ) . mean( )
26.0
( ( y[ : 2 ] - y[ : 2 ] . mean( ) ) ** 2 ) . mean( )
1.0
第一棵树,根据平均值,计算量残差[-6,-4,4,6]
plt. rcParams[ 'font.sans-serif' ] = 'KaiTi'
plt. figure( figsize= ( 9 , 6 ) )
_= tree. plot_tree( gbdt[ 0 , 0 ] , filled= True , feature_names= [ '消费' , '上网' ] )
gbdt1 = np. array( [ 6 , - 4 , 4 , 6 ] )
gbdt1 - gbdt1* 0.1
array([ 5.4, -3.6, 3.6, 5.4])
根据梯度提升,减少残差(残差越小,结果越好)
plt. rcParams[ 'font.sans-serif' ] = 'KaiTi'
plt. figure( figsize= ( 9 , 6 ) )
_= tree. plot_tree( gbdt[ 1 , 0 ] , filled= True , feature_names= [ '消费' , '上网' ] )
plt. rcParams[ 'font.sans-serif' ] = 'KaiTi'
plt. figure( figsize= ( 9 , 6 ) )
_= tree. plot_tree( gbdt[ 2 , 0 ] , filled= True , feature_names= [ '消费' , '上网' ] )