Python:序分类方法比较

import time
import xlwt
import matlab
import matlab.engine
import numpy as np
import pandas as pd
from pathlib import Path
from copy import deepcopy
from collections import OrderedDict
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error, recall_score
from mord import LogisticAT,LogisticIT

engine = matlab.engine.start_matlab()


class ComparClassifier():
    def __init__(self,X_train,y_train,labeled,X_test,y_test):
        self.X = X_train[labeled]
        self.y = y_train[labeled]
        self.X_test = X_test
        self.y_test = y_test
        self.labeledNum = len(labeled)
        ##########################
        self.labels = np.unique(y_train)
        self.target = np.array([_ for _ in np.arange(self.labels[0], self.labels[-1], 1)])
        self.labNum = len(self.labels)
        self.ocModel = self.init_learning_model()
        self.trainIndex = OrderedDict()
        self.trainTarget = OrderedDict()
        ##########################
        self.ACC_1 = 0
        self.MAE_1 = 0
        self.Recall_1 = 0
        self.F1_1 = 0
        ###----------------------
        self.ACC_2 = 0
        self.MAE_2 = 0
        self.Recall_2 = 0
        self.F1_2 = 0
        ###----------------------
        self.NNOP_ACC = 0
        self.NNOP_MAE = 0
        self.NNOP_Recall = 0
        self.NNOP_F1 = 0
        ###----------------------
        self.POM_ACC = 0
        self.POM_MAE = 0
        self.POM_Recall = 0
        self.POM_F1 = 0
        ###----------------------
        self.LIT_ACC = 0
        self.LIT_MAE = 0
        self.LIT_Recall = 0
        self.LIT_F1 = 0
        ###----------------------
        self.LAT_ACC = 0
        self.LAT_MAE = 0
        self.LAT_Recall = 0
        self.LAT_F1 = 0
        ###----------------------
        self.LR_ACC = 0
        self.LR_MAE = 0
        self.LR_Recall = 0
        self.LR_F1 = 0

    def init_learning_model(self):
        model_dict = OrderedDict()
        for tar in self.target:
            model_dict[tar] = LogisticRegression(solver='newton-cg', penalty='l2')
        return model_dict

    def BDadd(self):
        self.trainTarget = OrderedDict()
        for tar in self.target:
            self.trainTarget[tar] = deepcopy(self.y)
            for j in range(self.labeledNum):
                if self.trainTarget[tar][j] <= tar:
                    self.trainTarget[tar][j] = tar
                else:
                    self.trainTarget[tar][j] = tar + 1
        for tar, model in self.ocModel.items():
            model.fit(self.X,self.trainTarget[tar])

        proDict = OrderedDict()
        for tar, model in self.ocModel.items():
            proDict[tar] = model.predict_proba(self.X_test)

        conf_dict = OrderedDict()
        for lab in self.labels:
            conf_dict[lab] = 0

        testNum = len(self.y_test)
        y_pred = np.zeros(testNum)
        for j in range(testNum):
            conf_DICT = deepcopy(conf_dict)
            for tar, prob in proDict.items():
                for ele in self.labels:
                    if ele <= tar:
                        conf_DICT[ele] += prob[j][0]
                    else:
                        conf_DICT[ele] += prob[j][1]
            total = sum(conf_DICT.values())
            for lab in self.labels:
                conf_DICT[lab] = conf_DICT[lab] / total
            y_pred[j] = max(conf_DICT, key=conf_DICT.get)
        self.ACC_1 = accuracy_score(y_pred=y_pred,y_true=self.y_test)
        self.F1_1 = f1_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.Recall_1 = recall_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.MAE_1 = mean_absolute_error(y_true=self.y_test, y_pred=y_pred)

    def BDsub(self):
        self.trainTarget = OrderedDict()
        for tar in self.target:
            self.trainTarget[tar] = deepcopy(self.y)
            for j in range(self.labeledNum):
                if self.trainTarget[tar][j] <= tar:
                    self.trainTarget[tar][j] = tar
                else:
                    self.trainTarget[tar][j] = tar + 1
        for tar, model in self.ocModel.items():
            model.fit(self.X,self.trainTarget[tar])

        proDict = OrderedDict()
        for tar, model in self.ocModel.items():
            proDict[tar] = model.predict_proba(self.X_test)
        y_pred = np.zeros(len(self.y_test))
        for j in range(len(self.y_test)):
            prob = OrderedDict()
            for ele in self.labels:
                if ele == self.labels[0]:
                    prob[ele] = proDict[ele][j][0]
                elif ele == self.labels[-1]:
                    prob[ele] = proDict[ele - 1][j][1]
                else:
                    prob[ele] = proDict[ele - 1][j][1] - proDict[ele][j][1]
            y_pred[j] = max(prob, key=prob.get)
        self.ACC_2 = accuracy_score(y_pred=y_pred,y_true=self.y_test)
        self.F1_2 = f1_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.Recall_2 = recall_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.MAE_2 = mean_absolute_error(y_true=self.y_test, y_pred=y_pred)

    def NNOP(self):
        print("执行NNOP")
        X_train_ = matlab.double(self.X.tolist())
        y_train_ = matlab.double(np.vstack(self.y).tolist())
        X_test_ = matlab.double(self.X_test.tolist())
        y_test_ = matlab.double(np.vstack(self.y_test).tolist())

        y_pred = engine.NNOPuse(X_train_, y_train_, X_test_, y_test_)
        y_pred = [ele[0] for ele in y_pred]
        self.NNOP_ACC = accuracy_score(y_pred=y_pred,y_true=self.y_test)
        self.NNOP_F1 = f1_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.NNOP_Recall = recall_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.NNOP_MAE = mean_absolute_error(y_true=self.y_test, y_pred=y_pred)

    def POM(self):
        print("执行POM")
        X_train_ = matlab.double(self.X.tolist())
        y_train_ = matlab.double(np.vstack(self.y).tolist())
        X_test_ = matlab.double(self.X_test.tolist())
        y_test_ = matlab.double(np.vstack(self.y_test).tolist())
        y_pred = engine.POMuse(X_train_, y_train_, X_test_, y_test_)
        y_pred = [ele[0] for ele in y_pred]
        self.POM_ACC = accuracy_score(y_pred=y_pred,y_true=self.y_test)
        self.POM_F1 = f1_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.POM_Recall = recall_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.POM_MAE = mean_absolute_error(y_true=self.y_test, y_pred=y_pred)

    def LIT(self):
        print("执行LIT")
        model = LogisticIT(alpha=1.0,verbose=0,max_iter=1000)
        y = np.ones(len(self.y),dtype=int)
        for i, ele in enumerate(self.y):
            y[i] = int(ele)
        model.fit(X=self.X,y=y)
        y_pred = model.predict(X=self.X_test)
        self.LIT_ACC = accuracy_score(y_pred=y_pred,y_true=self.y_test)
        self.LIT_F1 = f1_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.LIT_Recall = recall_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.LIT_MAE = mean_absolute_error(y_true=self.y_test, y_pred=y_pred)

    def LAT(self):
        print("执行LAT")
        model = LogisticAT(alpha=1.0,verbose=0,max_iter=1000)
        y = np.ones(len(self.y),dtype=int)
        for i, ele in enumerate(self.y):
            y[i] = int(ele)
        model.fit(X=self.X,y=y)
        y_pred = model.predict(X=self.X_test)
        self.LAT_ACC = accuracy_score(y_pred=y_pred,y_true=self.y_test)
        self.LAT_F1 = f1_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.LAT_Recall = recall_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.LAT_MAE = mean_absolute_error(y_true=self.y_test, y_pred=y_pred)

    def LR(self):
        model = LogisticRegression(penalty='l2',solver='newton-cg')
        model.fit(X=self.X,y=self.y)
        y_pred = model.predict(X=self.X_test)
        self.LR_ACC = accuracy_score(y_pred=y_pred,y_true=self.y_test)
        self.LR_F1 = f1_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.LR_Recall = recall_score(y_true=self.y_test, y_pred=y_pred, average='macro')
        self.LR_MAE = mean_absolute_error(y_true=self.y_test, y_pred=y_pred)


class Store():
    def __init__(self):
        self.ACC_mean = []
        self.ACC_std = []
        self.Recall_mean = []
        self.Recall_std = []
        self.F1_mean = []
        self.F1_std = []
        self.MAE_mean = []
        self.MAE_std = []


if __name__ == '__main__':
    p = Path("D:\OCdata")
    # names = ["abalone15-5bin","balance-scale","bank15-5bin","car","computer15-5bin","ERA","ESL","eucalyptus","housing-5bin","LEV","machine-5bin","newthyroid","stock-5bin","SWD","winequality-red"]
    # names = ["ERA","ESL","eucalyptus","housing-5bin","LEV","machine-5bin","newthyroid","stock-5bin","SWD","winequality-red"]

    # names=["car","LEV","ESL","ERA","housing-5bin","machine-5bin","computer15-5bin","stock-5bin","baseball","SWD"]
    # names=["car","ERA","ESL","LEV","housing-5bin","machine-5bin","computer15-5bin","stock-5bin","baseball","QSR2020-5bin","ARWU2020-5bin","ARWU2020-10bin"]
    # names=["melanoma"]
    names=["automobile","car","LEV","SWD"]
    # names = ["QSR2020-5bin","ARWU2020-5bin","ARWU2020-10bin"]
    Methods = ["LIT","LAT","BDadd","BDsub","NNOP","LR"]

    for name in names:
        path = p.joinpath(name + ".csv")
        print("#####################################################{}".format(path))
        data = np.array(pd.read_csv(path, header=None))
        X = data[:, :-1]
        y = data[:, -1]
        sto = OrderedDict()
        for method in Methods:
            sto[method] = Store()
        initNumList = np.arange(1,15,3)
        for init in initNumList:
            Rounds = 5
            ###-------------------
            ACC_BDadd_list = []
            ACC_BDsub_list = []
            ACC_NNOP_list = []
            ACC_POM_list = []
            ACC_LIT_list = []
            ACC_LAT_list = []
            ACC_LR_list = []
            ###-------------------
            Recall_BDadd_list = []
            Recall_BDsub_list = []
            Recall_NNOP_list = []
            Recall_POM_list = []
            Recall_LIT_list = []
            Recall_LAT_list = []
            Recall_LR_list = []
            ###-------------------
            F1_BDadd_list = []
            F1_BDsub_list = []
            F1_NNOP_list = []
            F1_POM_list = []
            F1_LIT_list = []
            F1_LAT_list = []
            F1_LR_list = []
            ###-------------------
            MAE_BDadd_list = []
            MAE_BDsub_list = []
            MAE_NNOP_list = []
            MAE_POM_list = []
            MAE_LIT_list = []
            MAE_LAT_list = []
            MAE_LR_list = []
            for r in range(Rounds):
                SKF = StratifiedKFold(n_splits=5, shuffle=True)
                for train_idx, test_idx in SKF.split(X, y):
                    train_X = X[train_idx]
                    train_y = y[train_idx]
                    test_X = X[test_idx]
                    test_y = y[test_idx]
                    labeled = []
                    label_dict = OrderedDict()
                    for lab in np.unique(train_y):
                        label_dict[lab] = []
                    for idx in range(len(train_y)):
                        label_dict[train_y[idx]].append(idx)
                    for idxlist in label_dict.values():
                        for jdx in np.random.choice(idxlist, size=init, replace=False):
                            labeled.append(jdx)

                    model = ComparClassifier(X_train=train_X,y_train=train_y,labeled=labeled,X_test=test_X,y_test=test_y)
                    model.BDadd()
                    model.BDsub()
                    model.NNOP()
                    model.POM()
                    model.LIT()
                    model.LAT()
                    model.LR()
                    ACC_BDadd_list.append(model.ACC_1)
                    ACC_BDsub_list.append(model.ACC_2)
                    ACC_NNOP_list.append(model.NNOP_ACC)
                    ACC_POM_list.append(model.POM_ACC)
                    ACC_LIT_list.append(model.LIT_ACC)
                    ACC_LAT_list.append(model.LAT_ACC)
                    ACC_LR_list.append(model.LR_ACC)

                    Recall_BDadd_list.append(model.Recall_1)
                    Recall_BDsub_list.append(model.Recall_1)
                    Recall_NNOP_list.append(model.NNOP_Recall)
                    Recall_POM_list.append(model.POM_Recall)
                    Recall_LIT_list.append(model.LIT_Recall)
                    Recall_LAT_list.append(model.LAT_Recall)
                    Recall_LR_list.append(model.LR_Recall)

                    F1_BDadd_list.append(model.F1_1)
                    F1_BDsub_list.append(model.F1_2)
                    F1_NNOP_list.append(model.NNOP_F1)
                    F1_POM_list.append(model.POM_F1)
                    F1_LIT_list.append(model.LIT_F1)
                    F1_LAT_list.append(model.LAT_F1)
                    F1_LR_list.append(model.LR_F1)

                    MAE_BDadd_list.append(model.MAE_1)
                    MAE_BDsub_list.append(model.MAE_2)
                    MAE_NNOP_list.append(model.NNOP_MAE)
                    MAE_POM_list.append(model.POM_MAE)
                    MAE_LIT_list.append(model.LIT_MAE)
                    MAE_LAT_list.append(model.LAT_MAE)
                    MAE_LR_list.append(model.LR_MAE)

            sto["LIT"].ACC_mean.append(np.mean(ACC_LIT_list))
            sto["LIT"].ACC_std.append(np.std(ACC_LIT_list))
            sto["LIT"].Recall_mean.append(np.mean(Recall_LIT_list))
            sto["LIT"].Recall_std.append(np.std(Recall_LIT_list))
            sto["LIT"].F1_mean.append(np.mean(F1_LIT_list))
            sto["LIT"].F1_std.append(np.std(F1_LIT_list))
            sto["LIT"].MAE_mean.append(np.mean(MAE_LIT_list))
            sto["LIT"].MAE_std.append(np.std(MAE_LIT_list))

            sto["LAT"].ACC_mean.append(np.mean(ACC_LAT_list))
            sto["LAT"].ACC_std.append(np.std(ACC_LAT_list))
            sto["LAT"].Recall_mean.append(np.mean(Recall_LAT_list))
            sto["LAT"].Recall_std.append(np.std(Recall_LAT_list))
            sto["LAT"].F1_mean.append(np.mean(F1_LAT_list))
            sto["LAT"].F1_std.append(np.std(F1_LAT_list))
            sto["LAT"].MAE_mean.append(np.mean(MAE_LAT_list))
            sto["LAT"].MAE_std.append(np.std(MAE_LAT_list))

            sto["BDadd"].ACC_mean.append(np.mean(ACC_BDadd_list))
            sto["BDadd"].ACC_std.append(np.std(ACC_BDadd_list))
            sto["BDadd"].Recall_mean.append(np.mean(Recall_BDadd_list))
            sto["BDadd"].Recall_std.append(np.std(Recall_BDadd_list))
            sto["BDadd"].F1_mean.append(np.mean(F1_BDadd_list))
            sto["BDadd"].F1_std.append(np.std(F1_BDadd_list))
            sto["BDadd"].MAE_mean.append(np.mean(MAE_BDadd_list))
            sto["BDadd"].MAE_std.append(np.std(MAE_BDadd_list))

            sto["BDsub"].ACC_mean.append(np.mean(ACC_BDsub_list))
            sto["BDsub"].ACC_std.append(np.std(ACC_BDsub_list))
            sto["BDsub"].Recall_mean.append(np.mean(Recall_BDsub_list))
            sto["BDsub"].Recall_std.append(np.std(Recall_BDsub_list))
            sto["BDsub"].F1_mean.append(np.mean(F1_BDsub_list))
            sto["BDsub"].F1_std.append(np.std(F1_BDsub_list))
            sto["BDsub"].MAE_mean.append(np.mean(MAE_BDsub_list))
            sto["BDsub"].MAE_std.append(np.std(MAE_BDsub_list))

            sto["NNOP"].ACC_mean.append(np.mean(ACC_NNOP_list))
            sto["NNOP"].ACC_std.append(np.std(ACC_BDadd_list))
            sto["NNOP"].Recall_mean.append(np.mean(Recall_BDadd_list))
            sto["NNOP"].Recall_std.append(np.std(Recall_BDadd_list))
            sto["NNOP"].F1_mean.append(np.mean(F1_BDadd_list))
            sto["NNOP"].F1_std.append(np.std(F1_BDadd_list))
            sto["NNOP"].MAE_mean.append(np.mean(MAE_BDadd_list))
            sto["NNOP"].MAE_std.append(np.std(MAE_BDadd_list))

            sto["LR"].ACC_mean.append(np.mean(ACC_BDadd_list))
            sto["LR"].ACC_std.append(np.std(ACC_BDadd_list))
            sto["LR"].Recall_mean.append(np.mean(Recall_BDadd_list))
            sto["LR"].Recall_std.append(np.std(Recall_BDadd_list))
            sto["LR"].F1_mean.append(np.mean(F1_BDadd_list))
            sto["LR"].F1_std.append(np.std(F1_BDadd_list))
            sto["LR"].MAE_mean.append(np.mean(MAE_BDadd_list))
            sto["LR"].MAE_std.append(np.std(MAE_BDadd_list))

        sheet_names = ["ACC_mean","ACC_std","Recall_mean","Recall_std","F1_mean","F1_std","MAE_mean","MAE_std"]
        # sheet_names = ["ACC", "Recall", "F1", "MAE"]
        save_path = Path(r"D:\Result_ORCompare")
        workbook = xlwt.Workbook()
        for sn in sheet_names:
            sheet = workbook.add_sheet(sn)
            for i, method in enumerate(Methods):
                if sn == "ACC_mean":
                    sheet.write(i,0,method)
                    for j in range(1,len(initNumList)+1):
                        sheet.write(i,j,sto[method].ACC_mean[j-1])
                elif sn == "ACC_std":
                    sheet.write(i,0,method)
                    for j in range(1,len(initNumList)+1):
                        sheet.write(i,j,sto[method].ACC_std[j-1])
                elif sn == "Recall_mean":
                    sheet.write(i, 0, method)
                    for j in range(1,len(initNumList)+1):
                        sheet.write(i,j,sto[method].Recall_mean[j-1])
                elif sn == "Recall_std":
                    sheet.write(i, 0, method)
                    for j in range(1,len(initNumList)+1):
                        sheet.write(i,j,sto[method].Recall_std[j-1])
                elif sn == "F1_mean":
                    sheet.write(i, 0, method)
                    for j in range(1,len(initNumList)+1):
                        sheet.write(i,j,sto[method].F1_mean[j-1])
                elif sn == "F1_std":
                    sheet.write(i, 0, method)
                    for j in range(1,len(initNumList)+1):
                        sheet.write(i,j,sto[method].F1_std[j-1])
                elif sn == "MAE_mean":
                    sheet.write(i, 0, method)
                    for j in range(1,len(initNumList)+1):
                        sheet.write(i,j,sto[method].MAE_mean[j-1])
                elif sn == "MAE_std":
                    sheet.write(i, 0, method)
                    for j in range(1,len(initNumList)+1):
                        sheet.write(i,j,sto[method].MAE_std[j-1])
        save_path = str(save_path.joinpath(name + "-result.xls"))
        workbook.save(save_path)






            # print("10倍5折交叉验证")
            # print("BDadd精度:{},标准差:{}".format(np.mean(ACC_BDadd_list),np.std(ACC_BDadd_list)))
            # print("BDsub精度:{},标准差:{}".format(np.mean(ACC_BDsub_list), np.std(ACC_BDsub_list)))
            # print("NNOP 精度:{},标准差:{}".format(np.mean(ACC_NNOP_list), np.std(ACC_NNOP_list)))
            # print("POM  精度:{},标准差:{}".format(np.mean(ACC_POM_list), np.std(ACC_POM_list)))
            # print("LIT  精度:{},标准差:{}".format(np.mean(ACC_LIT_list), np.std(ACC_LIT_list)))
            # print("LAT  精度:{},标准差:{}".format(np.mean(ACC_LAT_list), np.std(ACC_LAT_list)))
            # print("LR  精度:{},标准差:{}".format(np.mean(ACC_LR_list), np.std(ACC_LR_list)))
            # print("   ")
            # print("BDadd召回:{},标准差:{}".format(np.mean(Recall_BDadd_list),np.std(Recall_BDadd_list)))
            # print("BDsub召回:{},标准差:{}".format(np.mean(Recall_BDsub_list), np.std(Recall_BDsub_list)))
            # print("NNOP 召回:{},标准差:{}".format(np.mean(Recall_NNOP_list), np.std(Recall_NNOP_list)))
            # print("POM  召回:{},标准差:{}".format(np.mean(Recall_POM_list), np.std(Recall_POM_list)))
            # print("LIT  召回:{},标准差:{}".format(np.mean(Recall_LIT_list), np.std(Recall_LIT_list)))
            # print("LAT  召回:{},标准差:{}".format(np.mean(Recall_LAT_list), np.std(Recall_LAT_list)))
            # print("LR  召回:{},标准差:{}".format(np.mean(Recall_LR_list), np.std(Recall_LR_list)))
            # print("    ")
            # print("BDadd F1:{},标准差:{}".format(np.mean(F1_BDadd_list),np.std(F1_BDadd_list)))
            # print("BDsub F1:{},标准差:{}".format(np.mean(F1_BDsub_list), np.std(F1_BDsub_list)))
            # print("NNOP  F1:{},标准差:{}".format(np.mean(F1_NNOP_list), np.std(F1_NNOP_list)))
            # print("POM   F1:{},标准差:{}".format(np.mean(F1_POM_list), np.std(F1_POM_list)))
            # print("LIT   F1:{},标准差:{}".format(np.mean(F1_LIT_list), np.std(F1_LIT_list)))
            # print("LAT   F1:{},标准差:{}".format(np.mean(F1_LAT_list), np.std(F1_LAT_list)))
            # print("LR   F1:{},标准差:{}".format(np.mean(F1_LR_list), np.std(F1_LR_list)))
            # print("    ")
            # print("BDadd绝对误差:{},标准差:{}".format(np.mean(MAE_BDadd_list),np.std(MAE_BDadd_list)))
            # print("BDsub绝对误差:{},标准差:{}".format(np.mean(MAE_BDsub_list), np.std(MAE_BDsub_list)))
            # print("NNOP 绝对误差:{},标准差:{}".format(np.mean(MAE_NNOP_list), np.std(MAE_NNOP_list)))
            # print("POM  绝对误差:{},标准差:{}".format(np.mean(MAE_POM_list), np.std(MAE_POM_list)))
            # print("LIT  绝对误差:{},标准差:{}".format(np.mean(MAE_LIT_list), np.std(MAE_LIT_list)))
            # print("LAT  绝对误差:{},标准差:{}".format(np.mean(MAE_LAT_list), np.std(MAE_LAT_list)))
            # print("LR  绝对误差:{},标准差:{}".format(np.mean(MAE_LR_list), np.std(MAE_LR_list)))





在少量标记数据情况下

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值