Python:基于二分类分解的序分类方法

参考文献:[1] Frank E, Hall M. A simple approach to ordinal classification[C]//European Conference on Machine Learning. Springer, Berlin, Heidelberg, 2001: 145-156.

 

分类效果比单独使用多分类‘OVR’效果要好1-2%个精度

import numpy as np
from copy import deepcopy
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,f1_score
from sklearn.model_selection import StratifiedKFold,KFold
from sklearn import datasets
from collections import OrderedDict



def BDOC_train(X,y):
    rowNum = len(y)
    labelNum = len(set(y))
    labels = np.unique(y)
    ###---------split the label list into K-1 label list-------------###
    labels_ = np.delete(labels,max(labels))
    target = OrderedDict()
    for i,ele in enumerate(labels_):
        target[i] = deepcopy(y)
        for j in range(rowNum):
            if target[i][j] <= ele:
                target[i][j] = ele
            else:
                target[i][j] = ele + 1
    modelDict = OrderedDict()
    ###----------------initial K-1 learning model----------------------###
    for i,ele in enumerate(labels_):
        modelDict[i] = LogisticRegression(solver='newton-cg',penalty='l2')
    ###----------------fit the K-1 learning model----------------------###
    for i,model in modelDict.items():
        model.fit(X,target[i])
    return modelDict

def BDOC_prob(modelDict,test_X):
    probDict = OrderedDict()
    for i,model in modelDict.items():
        probDict[i] = model.predict_proba(test_X)
    return probDict

def BDOC_pred(probDict,len_test,labelNum,labels):
    y_pred = np.zeros(len_test)
    for i in range(len_test):
        prob = np.zeros(labelNum)
        for ele in labels:
            if ele == min(labels):
                prob[ele] = probDict[ele][i][0]
            elif ele == max(labels):
                prob[ele] = probDict[ele-1][i][1]
            else:
                prob[ele] = probDict[ele-1][i][1] - probDict[ele][i][1]
        y_pred[i] = np.argmax(prob)
    return y_pred


if __name__ == '__main__':
    X,y = datasets.load_iris(return_X_y=True)
    labelNum = len(set(y))
    labels = np.unique(y)
    AccList = []
    SKF = StratifiedKFold(n_splits=10,shuffle=True,random_state=0)
    for train_idx, test_idx in SKF.split(X, y):
        train_X, train_y = X[train_idx, :], y[train_idx]
        test_X, test_y = X[test_idx, :], y[test_idx]
        len_test = len(test_y)
        modelDict = BDOC_train(train_X,train_y)
        probDict = BDOC_prob(modelDict,test_X)
        # print(probDict[0])
        # print(probDict[0][1])
        # print(probDict[2][1][0])
        # print(probDict[2][1][1])
        y_pred = BDOC_pred(probDict, len_test, labelNum, labels)
        acc = accuracy_score(y_true=test_y,y_pred=y_pred)
        AccList.append(acc)
    print("Mean Acc =",np.mean(AccList))

第二个版本,写在类里面了

'''
Target:Binary decomposition ordinal classificer
auther: DeniuHe
date:2020-08-05
'''
import numpy as np
from copy import deepcopy
from sklearn import datasets
from scipy.spatial.distance import pdist,squareform
from sklearn.model_selection import train_test_split
from collections import OrderedDict
from itertools import combinations,product
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,f1_score
from itertools import combinations,product
from sklearn.metrics import accuracy_score,f1_score

class BDOC():
    def __init__(self,X_train,y_train):
        self.X = X_train
        self.y = y_train
        self.labels = np.unique(y)
        self.labels_ = np.delete(self.labels, max(self.labels))  # 删除了少最后一个标签
        self.labelNum = len(self.labels)
        self.targetRecon = self.targetReconstruct()
        self.Acc = 0
        self.MZE = 0
        self.MAE = 0
        self.F1_macro = 0
        self.ocModel = self.initModel()  # 格式:有序字典。涉及更新

    def initModel(self):
        modelDict = OrderedDict()
        for i, ele in enumerate(self.labels_):
            modelDict[i] = LogisticRegression(solver='newton-cg', penalty='l2')
            # modelDict[i] = SVC(C=3.0, kernel='rbf', degree=3, gamma='auto',probability= True)
        return modelDict

    def targetReconstruct(self):
        ###将“绝对标记”索引和“区间标记”索引 重构成基于二分类分解序分类的训练样本
        targetRecon = OrderedDict()     # 初始化重构标签
        ###----将绝对标记样本重构为K-1个二分类训练样本-----###
        for i, ele in enumerate(self.labels_):
            targetRecon[i] = deepcopy(self.y)       #格式是ndarray
            for j in range(len(self.y)):
                if targetRecon[i][j] <= ele:
                    targetRecon[i][j] = ele
                else:
                    targetRecon[i][j] = ele + 1
        return  targetRecon

    def modelTrain(self):
        for i,model in self.ocModel.items():
            model.fit(self.X,self.targetRecon[i])

    def pred(self,X_test,y_test):
        proDict = OrderedDict()
        for i, model in self.ocModel.items():
            proDict[i] = model.predict_proba(X_test)
        y_pred = np.zeros(len(y_test))
        for j in range(len(y_test)):
            prob = np.zeros(self.labelNum)
            for ele in self.labels:
                if ele == min(self.labels):
                    prob[ele] = proDict[ele][j][0]
                elif ele == max(self.labels):
                    prob[ele] = proDict[ele - 1][j][1]
                else:
                    prob[ele] = proDict[ele - 1][j][1] - proDict[ele][j][1]
            y_pred[j] = np.argmax(prob)
        acc = accuracy_score(y_true=y_test,y_pred=y_pred)
        f1 = f1_score(y_true=y_test,y_pred=y_pred,average='macro')
        self.Acc = acc
        self.F1_macro = f1
        self.MZE = 1 - acc
        self.MAE = sum(abs(y_pred - y_test))/len(y_test)


if __name__ == '__main__':
    X, y = datasets.load_iris(return_X_y=True)
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=111)
    print(len(set(y_test)))
    print(set(y_train))
    bdoc = BDOC(X_train=X_train,y_train=y_train)
    bdoc.modelTrain()
    bdoc.pred(X_test=X_test,y_test=y_test)
    print("Acc=",bdoc.Acc)
    print("F1_macro=",bdoc.F1_macro)
    print("MZE=",bdoc.MZE)
    print("MAE=",bdoc.MAE)

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值