import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#实现概率质量函数from scipy.special import comb
import math
defensemble_error(n_classifier,error):
k_start=int(math.ceil(n_classifier/2))
probs=[comb(n_classifier,k)*(error**k)*(1-error)**(n_classifier-k)for k inrange(k_start,n_classifier+1)]returnsum(probs)
#基于分类概率
ex=np.array([[0.9,0.1],[0.8,0.2],[0.4,0.6]])
ex
array([[0.9, 0.1],
[0.8, 0.2],
[0.4, 0.6]])
p=np.average(ex,axis=0,weights=[0.2,0.2,0.6])
p
array([0.58, 0.42])
#多数投票from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
import six
from sklearn.base import clone
from sklearn.pipeline import _name_estimators
import operator
classMajorityVoteClassifier(BaseEstimator,ClassifierMixin):"""
多数票集成分类器
parameters
-------------
Classifiers:array,shape=[n_classifiers]
vote:str{'classlabel ','probability'}
Defult:'classlabel'
if 'classlabel',prediction基于类别标签的%alias大值的索引
if'probability',概率的总和的最大值被用来索引被用来预测标签
weights:array,shape=[n_classifiers]
optional,default:None
‘int’或者‘flaot’的列表被提供,那么分类器赋予权重,按照重要性。
如果‘wights=None’,则权重均匀
"""def__init__(self,classifiers,vote='classlabel',weights=None):
self.classifiers=classifiers
self.named_classifiers={
key: value for key,
value in _name_estimators(classifiers)}
self.vote=vote
self.weights=weights
deffit(self,X,y):"""
fit classifiers
parametes
------------
X:array,shape=[n_samples,n_features]
y:array,shape=[n_samples]
returns
___________
self:objects
"""
self.lablenc_=LabelEncoder()#标签编码,从0开始
self.lablenc_.fit(y)
self.classes_=self.lablenc_.classes_
self.classifiers_=[]for clf in self.classifiers:
fitted_clf=clone(clf).fit(X,self.lablenc_.transform(y))#clone构建一个相同参数的calssifiers
self.classifiers_.append(fitted_clf)#一些fitted的分类器return self
defpredict(self,X):"""
parametes
___________
X:array,shape=[n_samples,n_features]
returns
___________
mai_vote:array,shape=[n_samples]
预测的类的标签
"""if self.vote=='probability':
maj_vote=np.argmax(self.predict_proba(X),axis=1)#返回最大值的索引else:
predictions=np.asarray([clf.predict(X)for clf in self.classifiers_]).T
#转置后,每行对应三个分类器对某个样本的预测类别
maj_vote=np.apply_along_axis(lambda x:np.argmax(np.bincount(x,weights=self.weights)),
axis=1,
arr=predictions)
maj_vote=self.lablenc_.inverse_transform(maj_vote)#反转化,获得原始标签return maj_vote
defpredict_proba(self,X):"""
parametes
___________
X:array,shape=[n_samples,n_features]
returns
------------
avg_proda:array
shape=[n_samples,n_classes]
对于每个样本属于每个类别的加权平均概率
"""
probas=np.asarray([clf.predict_proba(X)for clf in self.classifiers_])
avg_proba=np.average(probas,axis=0,weights=self.weights)return avg_proba
defget_params(self,deep=True):"""
为了网格搜索获得分类器参数
"""ifnot deep:returnsuper(MajorityVoteClassifier,self).get_params(deep=False)#调用父类的方法else:
out=self.named_classifiers.copy()for name,step in six.iteritems(self.named_classifiers):for key,value in six.iteritems(step.get_params(deep=True)):
out[