集成学习——Random Forest

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,cross_validation,ensemble

加载数据集

#糖尿病病人
def load_data_regression():
    diabetes=datasets.load_diabetes()
    return cross_validation.train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0)

#手写识别数据集Digit Dataset
def load_data_classification():
    digits=datasets.load_digits()
    return cross_validation.train_test_split(digits.data,digits.target,test_size=0.25,random_state=0)

RandomForestClassifier随机森林分类器

模型原型

class sklearn.ensemble.RandomForestClassifier(n_estimators=10,criterion=’gini’,max_depth=None,min_samples_split=2, min_samples_leaf=1,min_weight_fraction_leaf=0.0,max_feature=’auto’,max_leaf_nodes=None,bootstrap=True,oob_score=Flase, n_jobs=1,random_state=None,verbose=0,warm_start=False,class_weight=None)
参数

  • n_estimators:随机森林中决策树的数量
  • criterion
  • max_depth
  • min_samples_split
  • min_samples_leaf
  • min_weight_fraction_leaf
  • max_feature
  • max_leaf_nodes
  • bootstrap
  • oob_score:如果为True,则使用包外样本来计算泛化误差
  • n_jobs
  • random_state
  • verbose
  • warm_start
  • class_weight

属性

  • estimators_:所有训练过的基础分类器
  • classes_
  • n_classes_
  • n_features_
  • n_outputs_
  • feature_importances
  • oobscore:训练数据使用包外估计时的得分

方法

  • fit(X,y[,samples_weight])
  • predict(X)
  • predict_log_proba(X)
  • predict_proba(X)
  • score(X,y[,samples_weight])

使用RandomForestClassifier类

def test_RandomForestClassifier(*data):
    X_train,X_test,y_train,y_test=data
    clf=ensemble.RandomForestClassifier()
    clf.fit(X_train,y_train)
    print('Training Score:%f'%clf.score(X_train,y_train))
    print('Testing Score:%f'%clf.score(X_test,y_test))

X_train,X_test,y_train,y_test=load_data_classification()
test_RandomForestClassifier(X_train,X_test,y_train,y_test)

森林中决策树的个数的影响

def test_RandomForestClassifier_num(*data):
    X_train,X_test,y_train,y_test=data
    nums=np.arange(1,100,step=2)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    training_scores=[]
    testing_scores=[]
    for num in nums:
        clf=ensemble.RandomForestClassifier(n_estimators=num)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(nums,training_scores,label='Training Score')
    ax.plot(nums,testing_scores,label='Testing Score')
    ax.set_xlabel('estimator num')
    ax.set_ylabel('score')
    ax.legend(loc='lower right')
    ax.set_ylim(0,1.05)
    plt.suptitle('RandomForestClassifier')
    plt.show()

test_RandomForestClassifier_num(X_train,X_test,y_train,y_test)

max_depth参数的影响

def test_RandomForestClassifier_max_depth(*data):
    X_train,X_test,y_train,y_test=data
    maxdepths=np.arange(1,20)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    training_scores=[]
    testing_scores=[]
    for max_depth in maxdepths:
        clf=ensemble.RandomForestClassifier(max_depth=max_depth)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(maxdepths,training_scores,label='Training Score')
    ax.plot(maxdepths,testing_scores,label='Testing Score')
    ax.set_xlabel('max_depth')
    ax.set_ylabel('score')
    ax.legend(loc='lower right')
    ax.set_ylim(0,1.05)
    plt.suptitle('RandomForestClassifier')
    plt.show()

test_RandomForestClassifier_max_depth(X_train,X_test,y_train,y_test)

max_features参数的影响

def test_RandomForestClassifier_max_features(*data):
    X_train,X_test,y_train,y_test=data
    max_features=np.linspace(0.01,1.0)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    training_scores=[]
    testing_scores=[]
    for max_feature in max_features:
        clf=ensemble.RandomForestClassifier(max_features=max_feature)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(max_features,training_scores,label='Training Score')
    ax.plot(max_features,testing_scores,label='Testing Score')
    ax.set_xlabel('max_feature')
    ax.set_ylabel('score')
    ax.legend(loc='lower right')
    ax.set_ylim(0,1.05)
    plt.suptitle('RandomForestClassifier')
    plt.show()

test_RandomForestClassifier_max_features(X_train,X_test,y_train,y_test)

RandomForestRegressor随机森林回归器

模型原型

class sklearn.ensemble.RandomForestRegressor(n_estimators=10,criterion=’mse’,max_depth=None,min_samples_split=2, min_samples_leaf=1,min_weight_fraction_leaf=0.0,max_feature=’auto’,max_leaf_nodes=None,bootstrap=True,oob_score=Flase, n_jobs=1,random_state=None,verbose=0,warm_start=False)
参数

  • n_estimators:随机森林中决策树的数量
  • criterion
  • max_depth
  • min_samples_split
  • min_samples_leaf
  • min_weight_fraction_leaf
  • max_feature
  • max_leaf_nodes
  • bootstrap
  • oob_score:如果为True,则使用包外样本来计算泛化误差
  • n_jobs
  • random_state
  • verbose
  • warm_start

属性

  • estimators_:所有训练过的基础分类器
  • n_features
  • n_outputs
  • feature_importances
  • oobscore:训练数据使用包外估计时的得分
  • oobprediction

方法

  • fit(X,y[,samples_weight])
  • predict(X)
  • score(X,y[,samples_weight])

使用RandomForestRegressor类

def test_RandomForestRegressor(*data):
    X_train,X_test,y_train,y_test=data
    regr=ensemble.RandomForestRegressor()
    regr.fit(X_train,y_train)
    print('Training Score:%f'%regr.score(X_train,y_train))
    print('Testing Score:%f'%regr.score(X_test,y_test))

X_train,X_test,y_train,y_test=load_data_regression()
test_RandomForestRegressor(X_train,X_test,y_train,y_test)

森林中决策树的个数的影响

def test_RandomForestRegressor_num(*data):
    X_train,X_test,y_train,y_test=data
    nums=np.arange(1,100,step=2)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    training_scores=[]
    testing_scores=[]
    for num in nums:
        clf=ensemble.RandomForestRegressor(n_estimators=num)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(nums,training_scores,label='Training Score')
    ax.plot(nums,testing_scores,label='Testing Score')
    ax.set_xlabel('estimator num')
    ax.set_ylabel('score')
    ax.legend(loc='lower right')
    ax.set_ylim(-1,1)
    plt.suptitle('RandomForestRegressor')
    plt.show()

test_RandomForestRegressor_num(X_train,X_test,y_train,y_test)

max_depth参数的影响

def test_RandomForestRegressor_max_depth(*data):
    X_train,X_test,y_train,y_test=data
    maxdepths=np.arange(1,20)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    training_scores=[]
    testing_scores=[]
    for max_depth in maxdepths:
        clf=ensemble.RandomForestRegressor(max_depth=max_depth)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(maxdepths,training_scores,label='Training Score')
    ax.plot(maxdepths,testing_scores,label='Testing Score')
    ax.set_xlabel('max_depth')
    ax.set_ylabel('score')
    ax.legend(loc='lower right')
    ax.set_ylim(0,1.05)
    plt.suptitle('RandomForestRegressor')
    plt.show()

test_RandomForestRegressor_max_depth(X_train,X_test,y_train,y_test)

max_features参数的影响

def test_RandomForestRegressor_max_features(*data):
    X_train,X_test,y_train,y_test=data
    max_features=np.linspace(0.01,1.0)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    training_scores=[]
    testing_scores=[]
    for max_feature in max_features:
        clf=ensemble.RandomForestRegressor(max_features=max_feature)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(max_features,training_scores,label='Training Score')
    ax.plot(max_features,testing_scores,label='Testing Score')
    ax.set_xlabel('max_feature')
    ax.set_ylabel('score')
    ax.legend(loc='lower right')
    ax.set_ylim(0,1.05)
    plt.suptitle('RandomForestRegressor')
    plt.show()

test_RandomForestRegressor_max_features(X_train,X_test,y_train,y_test)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值