RED-SVM

本文介绍了一种使用RED_KELM和REDSVM的分类算法,通过扩展特征和核技巧,实现了样本的高效表示和分类。RED_KELM利用核函数结合扩展部分,而REDSVM则将样本转换为线性不可分的高维空间进行处理。这两种方法在训练集构造、核矩阵计算和预测性能上进行了详细阐述。
摘要由CSDN通过智能技术生成
import xlwt
import math
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from collections import OrderedDict
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from scipy.special import expit
from copy import deepcopy
from scipy.spatial.distance import pdist, squareform
from sklearn.metrics import accuracy_score, mean_absolute_error, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.metrics import accuracy_score
from time import time
from sklearn import preprocessing
from sklearn.metrics.pairwise import pairwise_kernels, pairwise_distances
from sklearn.base import ClassifierMixin, BaseEstimator
from sklearn.utils.validation import check_X_y
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer
from sklearn.metrics import accuracy_score, mean_absolute_error, f1_score
from mord import LogisticAT
from sklearn.model_selection import train_test_split
from numpy.linalg import inv


class RED_KELM():
    def __init__(self):
        self.gamma = 0.1
        self.C = 100
        self.eX = None
        self.ey = None

    def fit(self, X, y):
        self.X = X
        self.y = y
        self.nSample, self.nDim = X.shape
        self.labels = list(np.sort(np.unique(y)))
        self.nClass = len(self.labels)
        self.nTheta = self.nClass - 1
        self.extend_part = np.eye(self.nClass-1)
        self.label_dict = self.Get_binary_label()
        self.eX, self.ey = self.train_set_construct(X=self.X, y=self.y)
        self.gram_train = self.get_gram_train()
        self.beta = inv(0.1*np.eye(self.gram_train.shape[0]) + self.gram_train) @ self.ey
        return self


    def Get_binary_label(self):
        label_dict = OrderedDict()
        for i, lab in enumerate(self.labels):
            tmp_label = np.ones(self.nClass-1)
            for k in range(self.nClass-1):
                if i <= k:
                    tmp_label[k] = -5
                else:
                    tmp_label[k] = 5
            label_dict[lab] = tmp_label
        return label_dict

    def train_set_construct(self, X, y):
        eX = np.zeros((self.nSample * self.nTheta, self.nDim + self.nTheta))
        ey = np.zeros(self.nSample * self.nTheta)
        for i in range(self.nSample):
            eXi = np.hstack((np.tile(X[i], (self.nTheta, 1)), self.extend_part))
            eX[self.nTheta * i: self.nTheta * i + self.nTheta] = eXi
            ey[self.nTheta * i: self.nTheta * i + self.nTheta] = self.label_dict[y[i]]
        return eX, ey

    def test_set_construct(self, X_test):
        nTest = X_test.shape[0]
        eX = np.zeros((nTest * self.nTheta, self.nDim + self.nTheta))
        for i in range(nTest):
            eXi = np.hstack((np.tile(X_test[i],(self.nTheta,1)), self.extend_part))
            eX[self.nTheta * i: self.nTheta * i + self.nTheta] = eXi
        return eX

    def get_gram_train(self):
        # gram_train_1 = -squareform(pdist(X=self.eX[:,:self.nDim],metric='euclidean'))
        gram_train_1 = -pairwise_distances(X=self.eX[:,:self.nDim],Y=self.eX[:,:self.nDim],metric="euclidean")
        gram_train_2 = self.eX[:,self.nDim:] @ self.eX[:,self.nDim:].T
        gram_train = gram_train_1 + gram_train_2
        return gram_train

    def get_gram_test(self, eX_test):
        gram_test_1 = -pairwise_distances(X=eX_test[:,:self.nDim],Y=self.eX[:,:self.nDim],metric="euclidean")
        gram_test_2 = eX_test[:,self.nDim:] @ self.eX[:,self.nDim:].T
        gram_test = gram_test_1 + gram_test_2
        return gram_test

    def predict(self, X):
        nTest = X.shape[0]
        eX_test = self.test_set_construct(X_test=X)
        gram_test = self.get_gram_test(eX_test)
        y_extend = np.sign(gram_test @ self.beta)
        y_tmp = y_extend.reshape(nTest,self.nTheta)
        y_pred = np.sum(y_tmp > 0, axis=1)
        return y_pred

    def distant_to_theta(self, X):
        nTest = X.shape[0]
        eX_test = self.test_set_construct(X_test=X)
        gram_test = self.get_gram_test(eX_test)
        dist_tmp = gram_test @ self.beta

        dist_matrix = -dist_tmp.reshape(nTest, self.nTheta)
        return dist_matrix

    def prdict_proba(self, X):
        nTest = X.shape[0]
        eX_test = self.test_set_construct(X_test=X)
        gram_test = self.get_gram_test(eX_test)
        dist_tmp = gram_test @ self.beta
        dist_matrix = -dist_tmp.reshape(nTest, self.nTheta) * 10
        accumulative_proba = expit(dist_matrix)
        prob = np.pad(
            accumulative_proba,
            pad_width=((0, 0), (1, 1)),
            mode='constant',
            constant_values=(0, 1))
        prob = np.diff(prob)
        return prob



class REDSVM():
    def __init__(self):
        # self.gamma = 0.1
        self.C = 10
        self.eX = self.ey = None

    def fit(self, X, y):
        self.X = np.asarray(X, dtype=np.float64)
        self.y = np.asarray(y, dtype=np.int32)
        self.nSample, self.nDim = X.shape
        self.labels = list(np.sort(np.unique(y)))
        self.nClass = len(self.labels)
        self.nTheta = self.nClass - 1
        self.extend_part = np.eye(self.nClass-1)
        self.label_dict = self.get_label_dict()
        self.eX, self.ey = self.train_set_construct(X=self.X, y=self.y)
        self.gram_train = self.get_gram_train()
        self.model = SVC(kernel='precomputed', C=10, probability=True)
        self.model.fit(self.gram_train, y=self.ey)
        return self



    def get_label_dict(self):
        label_dict = OrderedDict()
        for i, lab in enumerate(self.labels):
            tmp_label = np.ones(self.nTheta)
            for k, pad in enumerate(self.labels[:-1]):
                if lab <= pad:
                    tmp_label[k] = 1
                else:
                    tmp_label[k] = -1
            label_dict[lab] = tmp_label
        return label_dict

    def train_set_construct(self, X, y):
        eX = np.zeros((self.nSample * self.nTheta, self.nDim + self.nTheta))
        ey = np.zeros(self.nSample * self.nTheta)
        for i in range(self.nSample):
            eXi = np.hstack((np.tile(X[i], (self.nTheta, 1)), self.extend_part))
            eX[self.nTheta * i: self.nTheta * i + self.nTheta] = eXi
            ey[self.nTheta * i: self.nTheta * i + self.nTheta] = self.label_dict[y[i]]
        return eX, ey

    def test_set_construct(self, X_test):
        nTest = X_test.shape[0]
        eX = np.zeros((nTest * self.nTheta, self.nDim + self.nTheta))
        for i in range(nTest):
            eXi = np.hstack((np.tile(X_test[i],(self.nTheta,1)), self.extend_part))
            eX[self.nTheta * i: self.nTheta * i + self.nTheta] = eXi
        return eX

    def get_gram_train(self):
        # gram_train_1 = rbf_kernel(X=self.eX[:,:self.nDim],gamma=0.1)
        gram_train_1 = -pairwise_distances(X=self.eX[:,:self.nDim],Y=self.eX[:,:self.nDim],metric="euclidean")
        gram_train_2 = self.eX[:,self.nDim:] @ self.eX[:,self.nDim:].T
        gram_train = gram_train_1 + gram_train_2
        return gram_train

    def get_gram_test(self, eX_test):
        # gram_test_1 = rbf_kernel(X=eX_test[:,:self.nDim], Y=self.eX[:,:self.nDim], gamma=0.1)
        gram_test_1 = -pairwise_distances(X=eX_test[:,:self.nDim],Y=self.eX[:,:self.nDim],metric="euclidean")
        gram_test_2 = eX_test[:,self.nDim:] @ self.eX[:,self.nDim:].T
        gram_test = gram_test_1 + gram_test_2
        return gram_test

    def predict(self, X_test):
        nTest = X_test.shape[0]
        eX_test = self.test_set_construct(X_test=X_test)
        gram_test = self.get_gram_test(eX_test)
        y_extend = self.model.predict(gram_test)
        y_tmp = y_extend.reshape(nTest,self.nTheta)
        y_pred = np.sum(y_tmp < 0, axis=1).astype(np.int32)
        return y_pred

    def predict_proba(self, X_test):
        nTest = X_test.shape[0]
        eX_test = self.test_set_construct(X_test=X_test)
        gram_test = self.get_gram_test(eX_test)
        dist_tmp = self.model.decision_function(gram_test)
        dist_matrix = dist_tmp.reshape(nTest, self.nTheta)
        accumulative_proba = expit(dist_matrix)
        prob = np.pad(
            accumulative_proba,
            pad_width=((0, 0), (1, 1)),
            mode='constant',
            constant_values=(0, 1))
        prob = np.diff(prob)
        return prob

    def distant_to_theta(self, X_test):
        nTest = X_test.shape[0]
        eX_test = self.test_set_construct(X_test=X_test)
        gram_test = self.get_gram_test(eX_test)
        dist_tmp = self.model.decision_function(gram_test)
        dist_matrix = dist_tmp.reshape(nTest, self.nTheta)
        return dist_matrix

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值