机器学习Sklearn实战——极限森林、梯度提升树算法

极限森林

from sklearn.ensemble import ExtraTreesClassifier,RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
#决策树,进行裂分时候,根据信息增益最大进行列分
#极限森林 1、样本随机 2、分裂条件随机(不是最好的裂分条件)
#像在随机森林中一样,使用候选特征的随机子集,但不是寻找最有区别的阈值
#而是为每个候选特征随机绘制阈值
#并选择这些随机生成的阈值中的最佳阈值作为划分规则

X,y = datasets.load_wine(True)
clf = DecisionTreeClassifier()
cross_val_score(clf,X,y,cv=6,scoring="accuracy").mean()

forest = RandomForestClassifier(n_estimators=100)
cross_val_score(forest,X,y,cv=6,scoring="accuracy").mean()

extra = ExtraTreesClassifier(n_estimators=100)
cross_val_score(extra,X,y,cv=6,scoring="accuracy").mean()

结果:

0.8653256704980842

0.9777777777777779

0.9833333333333334

梯度提升树的使用

import numpy as np 
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
X,y = datasets.load_iris(True)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)
gbdt = GradientBoostingClassifier()
gbdt.fit(X_train,y_train)
gbdt.score(X_test,y_test)

结果:

0.9666666666666667
import numpy as np
import matplotlib.pyplot as plt

#回归时分类的极限思想
#分类的类别多到一定程度,那么就是回归
from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor
from sklearn import tree


# X数据:上网时间和购物金额
# y目标:14(高一),16(高三),24(大学毕业),26(工作两年)
X = np.array([[800,3],[1200,1],[1800,4],[2500,2]])
y = np.array([14,16,24,26])
gbdt = GradientBoostingRegressor(n_estimators=10)
gbdt.fit(X,y)

gbdt.predict(X)

结果:

array([16.09207064, 17.39471376, 22.60528624, 23.90792936])
plt.rcParams["font.sans-serif"]="KaiTi"
plt.figure(figsize=(9,6))
_ = tree.plot_tree(gbdt[0,0],filled=True,feature_names=["消费","上网"])

friedman_mse = ((y[:2]-y[:2].mean())**2).mean() =1

value是14,16,24,26和20的差,即残差,残差越小——>越好——>越准确

plt.rcParams["font.sans-serif"]="KaiTi"
plt.figure(figsize=(9,6))
_ = tree.plot_tree(gbdt[1,0],filled=True,feature_names=["消费","上网"])
#learning_rate = 0.1
gbdt1 = np.array([-6,-4,6,4])
#梯度提升 学习率0.1
gbdt1 - gbdt1*0.1

结果:

array([-5.4, -3.6,  5.4,  3.6])

#learning_rate = 0.1
gbdt2 = np.array([-5.4,-3.6,5.4,3.6])
#梯度提升 学习率0.1
gbdt2 - gbdt2*0.1

 结果:

array([-4.86, -3.24,  4.86,  3.24])
plt.rcParams["font.sans-serif"]="KaiTi"
plt.figure(figsize=(9,6))
_ = tree.plot_tree(gbdt[2,0],filled=True,feature_names=["消费","上网"])

 最后一棵树

plt.rcParams["font.sans-serif"]="KaiTi"
plt.figure(figsize=(9,6))
_ = tree.plot_tree(gbdt[-1,0],filled=True,feature_names=["消费","上网"])

#learning_rate = 0.1
gbdt3 = np.array([-2.325,-1.55,2.325,1.55])
#梯度提升 学习率0.1
gbdt3 - gbdt3*0.1

 结果:

array([-2.0925,-1.395,2.0925,1.395])
array([-2.0925,-1.395,1.395,2.0925])

14,16,24,26下减上

16.0925,17.395,22.605,23.9075

gbdt.predict(X)

结果:

array([16.09207064, 17.39471376, 22.60528624, 23.90792936])

梯度上升梯度下降

下降——减法求最小值;上升——加法求最大值

import numpy as np
import matplotlib.pyplot as plt
f = lambda x:(x-3)**2 + 2.5*x -7.5
f

#导数 = 梯度
2(x-3)+2.5 = 0
x = 1.75

x = np.linspace(-2,5,100)
y = f(x)
plt.plot(x,y)

import numpy as np
import matplotlib.pyplot as plt
f = lambda x:(x-3)**2 + 2.5*x -7.5
f

#导数 = 梯度

x = np.linspace(-2,5,100)
y = f(x)
plt.plot(x,y)

learning_rate = 0.1

#导数函数
d = lambda x:2*(x-3) + 2.5
min_value = np.random.randint(-3,5,size=1)[0]

print("---------------",min_value)
#记录数据更新了,原来的值,上一步的值,退出条件
min_value_last = min_value +0.1
tol = 0.0001

count = 0
while True:
    if np.abs(min_value-min_value_last)<tol:
        break
#梯度下降
    min_value_last = min_value
#更新值:梯度下降
    min_value = min_value - learning_rate*d(min_value)
    print("+++++++++++++++++%d"%(count),min_value)
    count = count + 1
print("****************",min_value)

结果:

----------------- 4
+++++++++++++++++0 3.55
+++++++++++++++++1 3.19
+++++++++++++++++2 2.902
+++++++++++++++++3 2.6716
+++++++++++++++++4 2.48728
+++++++++++++++++5 2.339824
+++++++++++++++++6 2.2218592
+++++++++++++++++7 2.12748736
+++++++++++++++++8 2.051989888
+++++++++++++++++9 1.9915919104
+++++++++++++++++10 1.94327352832
+++++++++++++++++11 1.904618822656
+++++++++++++++++12 1.8736950581248
+++++++++++++++++13 1.84895604649984
+++++++++++++++++14 1.829164837199872
+++++++++++++++++15 1.8133318697598977
+++++++++++++++++16 1.8006654958079182
+++++++++++++++++17 1.7905323966463347
+++++++++++++++++18 1.7824259173170678
+++++++++++++++++19 1.7759407338536541
+++++++++++++++++20 1.7707525870829233
+++++++++++++++++21 1.7666020696663387
+++++++++++++++++22 1.763281655733071
+++++++++++++++++23 1.760625324586457
+++++++++++++++++24 1.7585002596691655
+++++++++++++++++25 1.7568002077353324
+++++++++++++++++26 1.755440166188266
+++++++++++++++++27 1.7543521329506127
+++++++++++++++++28 1.7534817063604902
+++++++++++++++++29 1.7527853650883922
+++++++++++++++++30 1.7522282920707137
+++++++++++++++++31 1.751782633656571
+++++++++++++++++32 1.7514261069252568
+++++++++++++++++33 1.7511408855402055
+++++++++++++++++34 1.7509127084321645
+++++++++++++++++35 1.7507301667457316
+++++++++++++++++36 1.7505841333965853
+++++++++++++++++37 1.7504673067172682
+++++++++++++++++38 1.7503738453738147
***************** 1.7503738453738147
import numpy as np
import matplotlib.pyplot as plt

f2 = lambda x : -(x - 3)**2 + 2.5*x -7.5

# 梯度提升 导数函数
result = []
d2 = lambda x : -2*(x - 3) + 2.5
learning_rate = 0.1
# max_value瞎蒙的值,方法,最快的速度找到最优解(梯度下降)
# 梯度消失,梯度爆炸(因为学习率太大)
max_value = np.random.randint(2,8,size = 1)[0]
# max_value = 1000

result.append(max_value)

print('-------------------',max_value)
# 记录数据更新了,原来的值,上一步的值,退出条件
max_value_last = max_value + 0.001
# tollerence容忍度,误差,在万分之一,任务结束
# precision精确度,精度达到了万分之一,任务结束
precision = 0.0001
count = 0
while True:
    if np.abs(max_value - max_value_last) < precision:
        break
#     梯度上升
    max_value_last = max_value
#     更新值:梯度上升
    max_value = max_value + learning_rate*d2(max_value)
    result.append(max_value)
    count +=1
    print('+++++++++++++++++++++%d'%(count),max_value)
print('**********************',max_value)

# 观察一下变化
plt.figure(figsize=(12,9))
x = np.linspace(4,8,100)
y = f2(x)
plt.plot(x,y)
result = np.asarray(result)
plt.plot(result,f2(result),'*')

结果:

------------------- 5
+++++++++++++++++++++1 4.85
+++++++++++++++++++++2 4.7299999999999995
+++++++++++++++++++++3 4.6339999999999995
+++++++++++++++++++++4 4.5572
+++++++++++++++++++++5 4.49576
+++++++++++++++++++++6 4.4466079999999994
+++++++++++++++++++++7 4.407286399999999
+++++++++++++++++++++8 4.37582912
+++++++++++++++++++++9 4.350663296
+++++++++++++++++++++10 4.3305306368
+++++++++++++++++++++11 4.31442450944
+++++++++++++++++++++12 4.301539607552
+++++++++++++++++++++13 4.2912316860416
+++++++++++++++++++++14 4.2829853488332805
+++++++++++++++++++++15 4.276388279066625
+++++++++++++++++++++16 4.2711106232533
+++++++++++++++++++++17 4.26688849860264
+++++++++++++++++++++18 4.263510798882112
+++++++++++++++++++++19 4.260808639105689
+++++++++++++++++++++20 4.2586469112845515
+++++++++++++++++++++21 4.256917529027641
+++++++++++++++++++++22 4.255534023222113
+++++++++++++++++++++23 4.254427218577691
+++++++++++++++++++++24 4.2535417748621525
+++++++++++++++++++++25 4.252833419889722
+++++++++++++++++++++26 4.252266735911777
+++++++++++++++++++++27 4.251813388729422
+++++++++++++++++++++28 4.251450710983538
+++++++++++++++++++++29 4.251160568786831
+++++++++++++++++++++30 4.250928455029465
+++++++++++++++++++++31 4.250742764023572
+++++++++++++++++++++32 4.250594211218858
+++++++++++++++++++++33 4.250475368975087
+++++++++++++++++++++34 4.2503802951800695
********************** 4.2503802951800695

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值