第7章集成学习

热爱学习的小鲁同学

已于 2022-05-11 18:32:56 修改

阅读量601

点赞数 1

分类专栏： python机器学习笔记文章标签：集成学习 python 机器学习

于 2022-05-11 10:41:51 首次发布

本文链接：https://blog.csdn.net/m0_45055763/article/details/124703711

版权

7.2多投票集成学习

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#实现概率质量函数
from scipy.special import comb
import math

def ensemble_error(n_classifier,error):
    k_start=int(math.ceil(n_classifier/2))
    probs=[comb(n_classifier,k)*(error**k)*(1-error)**(n_classifier-k) 
          for k in range(k_start,n_classifier+1)]
    
    return sum(probs)

ensemble_error(n_classifier=11,error=0.25)

0.03432750701904297

#绘制集成错误率与基本分类器错误率关系

#得到集成错误率
error_range=np.arange(0.0,1.01,0.01)
ens_errors=[ensemble_error(n_classifier=11,error=err) for err in error_range]

#绘图
plt.plot(error_range,ens_errors,label='Ensemble erroe',linewidth=2)
plt.plot(error_range,error_range,linestyle='--',
       label='Base error',linewidth=2)
plt.xlabel('base error')
plt.ylabel('base/ensemble error')
plt.legend(loc='best')
plt.grid()
plt.show()

在这里插入图片描述

np.argmax(np.bincount([0,0,1],weights=[0.2,0.2,0.6]))

np.bincount([0,0,1],weights=[0.2,0.2,0.6])

array([0.4, 0.6])

#基于分类概率
ex=np.array([[0.9,0.1],
           [0.8,0.2],
           [0.4,0.6]])
ex

array([[0.9, 0.1],
       [0.8, 0.2],
       [0.4, 0.6]])

p=np.average(ex,axis=0,weights=[0.2,0.2,0.6])
p

array([0.58, 0.42])

#多数投票
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
import six
from sklearn.base import clone
from sklearn.pipeline import _name_estimators
import operator

class MajorityVoteClassifier(BaseEstimator,ClassifierMixin):

    """
  多数票集成分类器
  
  parameters
  -------------
  Classifiers:array,shape=[n_classifiers]
  
  vote:str{'classlabel ','probability'}
  Defult:'classlabel'
  if 'classlabel',prediction基于类别标签的%alias大值的索引
  if'probability',概率的总和的最大值被用来索引被用来预测标签
  
  weights:array,shape=[n_classifiers]
  optional,default:None
  ‘int’或者‘flaot’的列表被提供，那么分类器赋予权重，按照重要性。
  如果‘wights=None’,则权重均匀
  
    """
    
    def __init__(self,classifiers,vote='classlabel',weights=None):
        
        self.classifiers=classifiers
        self.named_classifiers={
   key: value for key,
                                value in _name_estimators(classifiers)}
        self.vote=vote
        self.weights=weights
        
    def fit(self,X,y):
        """
        fit classifiers
        
        parametes
        ------------
        X:array,shape=[n_samples,n_features]
        
        y:array,shape=[n_samples]
        
        returns
        ___________
        self:objects
        """
        self.lablenc_=LabelEncoder()#标签编码,从0开始
        self.lablenc_.fit(y)
        self.classes_=self.lablenc_.classes_
        self.classifiers_=[]
        for clf in self.classifiers:
            fitted_clf=clone(clf).fit(X,self.lablenc_.transform(y))#clone构建一个相同参数的calssifiers
            self.classifiers_.append(fitted_clf)#一些fitted的分类器
            
        return self
    
    def predict(self,X):
        
        """
        parametes
        ___________
        X:array,shape=[n_samples,n_features]
        
        returns
        ___________
        mai_vote:array,shape=[n_samples]
        预测的类的标签
        
        """
        if self.vote=='probability':
            maj_vote=np.argmax(self.predict_proba(X),axis=1)#返回最大值的索引
            
        else:
            predictions=np.asarray([clf.predict(X) for clf in self.classifiers_]).T
            #转置后，每行对应三个分类器对某个样本的预测类别
            
            maj_vote=np.apply_along_axis(lambda x:np.argmax(np.bincount(x,weights=self.weights)),
                                        axis=1,
                                        arr=predictions)
            
        maj_vote=self.lablenc_.inverse_transform(maj_vote)#反转化，获得原始标签
        return maj_vote
        
        
    def predict_proba(self,X):
        

        """
        parametes
         ___________
        X:array,shape=[n_samples,n_features]
        
        returns
        ------------
        avg_proda:array
        shape=[n_samples,n_classes]
        对于每个样本属于每个类别的加权平均概率

        """
        probas=np.asarray([clf.predict_proba(X) for clf in self.classifiers_])
        
        avg_proba=np.average(probas,axis=0,weights=self.weights)
        
        return avg_proba
    
    def get_params(self,deep=True):
        
        """
        为了网格搜索获得分类器参数
        """
        
        if not deep:
            return super(MajorityVoteClassifier,self).get_params(deep=False)  #调用父类的方法
        
        else:
            out=self.named_classifiers.copy()
            for name,step in six.iteritems(self.named_classifiers):
                for key,value in six.iteritems(step.get_params(deep=True)):
                    out[

最低0.47元/天解锁文章

热爱学习的小鲁同学

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
第7章集成学习

7.2多投票集成学习import numpy as npimport pandas as pdimport matplotlib.pyplot as plt#实现概率质量函数from scipy.special import combimport mathdef ensemble_error(n_classifier,error): k_start=int(math.ceil(n_classifier/2)) probs=[comb(n_classifier,k)*(err
复制链接

扫一扫