from sklearn.datasets import make_blobs
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
from sklearn.metrics import accuracy_score,roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
#from sklearn.cross_validation import StratifiedKFold
'''
Voting即投票机制,分为软投票和硬投票两种,其原理采用少数服从多数的思想。
硬投票:对多个模型直接进行投票,最终投票数最多的类为最终被预测的类。
软投票:和硬投票原理相同,增加了设置权重的功能,可以为不同模型设置不同权重,进而区别模型不同的重要度。
备注:此方法用于解决分类问题。
'''
iris = datasets.load_iris()
x=iris.data
y=iris.target
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
clf1 = XGBClassifier(learning_rate=0.1, n_estimators=140, max_depth=1, min_child_weight=2, gamma=0, subsample=0.7,
colsample_bytree=0.6, objective='binary:logistic', nthread=4, scale_pos_weight=1)
clf2 = RandomForestClassifier(n_estimators=50, max_depth=1, min_samples_split=4,
min_samples_leaf=54,oob_score=True)
clf3 = SVC(C=0.1, probability=True)
# 硬投票
eclf = VotingClassifier(estimators=[('xgb', clf1), ('rf', clf2), ('svc', clf3)], voting='hard')
for clf, label in zip([clf1, clf2, clf3, eclf], ['XGBBoosting', 'Random Forest', 'SVM', 'Ensemble']):
scores = cross_val_score(clf, x, y, cv=5, scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
#软投票
x=iris.data
y=iris.target
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
clf1 &#