# ScikitLearn 分类集成算法学习笔记

1.Bagging 算法
①BaggingClassifier

from pandas import read_csv
from sklearn import datasets
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
# 导入数据
#filename = 'pima_data.csv'
#names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
# 将数据分为输入数据和输出结果
X = iris.data
Y = iris.target
num_folds = 10
seed = 7
kfold = KFold(n_splits=num_folds, random_state=seed)
cart = DecisionTreeClassifier()
num_tree = 100
model = BaggingClassifier(base_estimator=cart, n_estimators=num_tree, random_state=seed)
result = cross_val_score(model, X, Y, cv=kfold)
print(result.mean())

②.随机森林

from pandas import read_csv
from sklearn import datasets
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
# 导入数据
#filename = 'pima_data.csv'
#names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
# 将数据分为输入数据和输出结果
X = iris.data
Y = iris.target
#scikit-learn中的实现类是RandomForestClassifier算法
model = RandomForestClassifier(n_estimators=num_tree, random_state=seed, max_features=max_features)
result = cross_val_score(model, X, Y, cv=kfold)
print("RandomForestClassifier",result.mean())

③.极端随机树

（1）随机森林应用的是Bagging模型，而极端随机树是使用所有的训练样本得到每棵决策树，也就是每棵决策树应用的是相同的全部训练样本。

（2）随机森林是在一个随机子集内得到最优分叉特征属性，而极端随机树是完全随机地选择分叉特征属性，从而实现对决策树进行分叉的。

model = ExtraTreesClassifier(n_estimators=num_tree, random_state=seed, max_features=max_features)
result = cross_val_score(model, X, Y, cv=kfold)

2、Boosting 算法

model = AdaBoostClassifier(n_estimators=num_tree, random_state=seed)
result = cross_val_score(model, X, Y, cv=kfold)

②随机梯度提升算法

model = GradientBoostingClassifier(n_estimators=num_tree, random_state=seed)
result = cross_val_score(model, X, Y, cv=kfold)

3、投票算法

#VotingClassifier算法
models = []
model_logistic = LogisticRegression()
models.append(('logistic', model_logistic))
model_cart = DecisionTreeClassifier()
models.append(('cart', model_cart))
model_svc = svm.SVC()
models.append(('svm', model_svc))
ensemble_model = VotingClassifier(estimators=models)
result = cross_val_score(ensemble_model, X, Y, cv=kfold)