import xlwt
import math
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from collections import OrderedDict
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from scipy.special import expit
from copy import deepcopy
from scipy.spatial.distance import pdist, squareform
from sklearn.metrics import accuracy_score, mean_absolute_error, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.metrics import accuracy_score
from time import time
from sklearn import preprocessing
from sklearn.metrics.pairwise import pairwise_kernels, pairwise_distances
from sklearn.base import ClassifierMixin, BaseEstimator
from sklearn.utils.validation import check_X_y
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer
from sklearn.metrics import accuracy_score, mean_absolute_error, f1_score
from mord import LogisticAT
from sklearn.model_selection import train_test_split
from numpy.linalg import inv
class RED_KELM():
def __init__(self):
self.gamma = 0.1
self.C = 100
self.eX = None
self.ey = None
def fit(self, X, y):
self.X = X
self.y = y
self.nSample, self.nDim = X.shape
self.labels = list(np.sort(np.unique(y)))
self.nClass = len(self.labels)
self.nTheta = self.nClass - 1
self.extend_part = np.eye(self.nClass-1)
self.label_dict = self.Get_binary_label()
self.eX, self.ey = self.train_set_construct(X=self.X, y=self.y)
self.gram_train = self.get_gram_train()
self.beta = inv(0.1*np.eye(self.gram_train.shape[0]) + self.gram_train) @ self.ey
return self
def Get_binary_label(self):
label_dict = OrderedDict()
for i, lab in enumerate(self.labels):
tmp_label = np.ones(self.nClass-1)
for k in range(self.nClass-1):
if i <= k:
tmp_label[k] = -5
else:
tmp_label[k] = 5
label_dict[lab] = tmp_label
return label_dict
def train_set_construct(self, X, y):
eX = np.zeros((self.nSample * self.nTheta, self.nDim + self.nTheta))
ey = np.zeros(self.nSample * self.nTheta)
for i in range(self.nSample):
eXi = np.hstack((np.tile(X[i], (self.nTheta, 1)), self.extend_part))
eX[self.nTheta * i: self.nTheta * i + self.nTheta] = eXi
ey[self.nTheta * i: self.nTheta * i + self.nTheta] = self.label_dict[y[i]]
return eX, ey
def test_set_construct(self, X_test):
nTest = X_test.shape[0]
eX = np.zeros((nTest * self.nTheta, self.nDim + self.nTheta))
for i in range(nTest):
eXi = np.hstack((np.tile(X_test[i],(self.nTheta,1)), self.extend_part))
eX[self.nTheta * i: self.nTheta * i + self.nTheta] = eXi
return eX
def get_gram_train(self):
# gram_train_1 = -squareform(pdist(X=self.eX[:,:self.nDim],metric='euclidean'))
gram_train_1 = -pairwise_distances(X=self.eX[:,:self.nDim],Y=self.eX[:,:self.nDim],metric="euclidean")
gram_train_2 = self.eX[:,self.nDim:] @ self.eX[:,self.nDim:].T
gram_train = gram_train_1 + gram_train_2
return gram_train
def get_gram_test(self, eX_test):
gram_test_1 = -pairwise_distances(X=eX_test[:,:self.nDim],Y=self.eX[:,:self.nDim],metric="euclidean")
gram_test_2 = eX_test[:,self.nDim:] @ self.eX[:,self.nDim:].T
gram_test = gram_test_1 + gram_test_2
return gram_test
def predict(self, X):
nTest = X.shape[0]
eX_test = self.test_set_construct(X_test=X)
gram_test = self.get_gram_test(eX_test)
y_extend = np.sign(gram_test @ self.beta)
y_tmp = y_extend.reshape(nTest,self.nTheta)
y_pred = np.sum(y_tmp > 0, axis=1)
return y_pred
def distant_to_theta(self, X):
nTest = X.shape[0]
eX_test = self.test_set_construct(X_test=X)
gram_test = self.get_gram_test(eX_test)
dist_tmp = gram_test @ self.beta
dist_matrix = -dist_tmp.reshape(nTest, self.nTheta)
return dist_matrix
def prdict_proba(self, X):
nTest = X.shape[0]
eX_test = self.test_set_construct(X_test=X)
gram_test = self.get_gram_test(eX_test)
dist_tmp = gram_test @ self.beta
dist_matrix = -dist_tmp.reshape(nTest, self.nTheta) * 10
accumulative_proba = expit(dist_matrix)
prob = np.pad(
accumulative_proba,
pad_width=((0, 0), (1, 1)),
mode='constant',
constant_values=(0, 1))
prob = np.diff(prob)
return prob
class REDSVM():
def __init__(self):
# self.gamma = 0.1
self.C = 10
self.eX = self.ey = None
def fit(self, X, y):
self.X = np.asarray(X, dtype=np.float64)
self.y = np.asarray(y, dtype=np.int32)
self.nSample, self.nDim = X.shape
self.labels = list(np.sort(np.unique(y)))
self.nClass = len(self.labels)
self.nTheta = self.nClass - 1
self.extend_part = np.eye(self.nClass-1)
self.label_dict = self.get_label_dict()
self.eX, self.ey = self.train_set_construct(X=self.X, y=self.y)
self.gram_train = self.get_gram_train()
self.model = SVC(kernel='precomputed', C=10, probability=True)
self.model.fit(self.gram_train, y=self.ey)
return self
def get_label_dict(self):
label_dict = OrderedDict()
for i, lab in enumerate(self.labels):
tmp_label = np.ones(self.nTheta)
for k, pad in enumerate(self.labels[:-1]):
if lab <= pad:
tmp_label[k] = 1
else:
tmp_label[k] = -1
label_dict[lab] = tmp_label
return label_dict
def train_set_construct(self, X, y):
eX = np.zeros((self.nSample * self.nTheta, self.nDim + self.nTheta))
ey = np.zeros(self.nSample * self.nTheta)
for i in range(self.nSample):
eXi = np.hstack((np.tile(X[i], (self.nTheta, 1)), self.extend_part))
eX[self.nTheta * i: self.nTheta * i + self.nTheta] = eXi
ey[self.nTheta * i: self.nTheta * i + self.nTheta] = self.label_dict[y[i]]
return eX, ey
def test_set_construct(self, X_test):
nTest = X_test.shape[0]
eX = np.zeros((nTest * self.nTheta, self.nDim + self.nTheta))
for i in range(nTest):
eXi = np.hstack((np.tile(X_test[i],(self.nTheta,1)), self.extend_part))
eX[self.nTheta * i: self.nTheta * i + self.nTheta] = eXi
return eX
def get_gram_train(self):
# gram_train_1 = rbf_kernel(X=self.eX[:,:self.nDim],gamma=0.1)
gram_train_1 = -pairwise_distances(X=self.eX[:,:self.nDim],Y=self.eX[:,:self.nDim],metric="euclidean")
gram_train_2 = self.eX[:,self.nDim:] @ self.eX[:,self.nDim:].T
gram_train = gram_train_1 + gram_train_2
return gram_train
def get_gram_test(self, eX_test):
# gram_test_1 = rbf_kernel(X=eX_test[:,:self.nDim], Y=self.eX[:,:self.nDim], gamma=0.1)
gram_test_1 = -pairwise_distances(X=eX_test[:,:self.nDim],Y=self.eX[:,:self.nDim],metric="euclidean")
gram_test_2 = eX_test[:,self.nDim:] @ self.eX[:,self.nDim:].T
gram_test = gram_test_1 + gram_test_2
return gram_test
def predict(self, X_test):
nTest = X_test.shape[0]
eX_test = self.test_set_construct(X_test=X_test)
gram_test = self.get_gram_test(eX_test)
y_extend = self.model.predict(gram_test)
y_tmp = y_extend.reshape(nTest,self.nTheta)
y_pred = np.sum(y_tmp < 0, axis=1).astype(np.int32)
return y_pred
def predict_proba(self, X_test):
nTest = X_test.shape[0]
eX_test = self.test_set_construct(X_test=X_test)
gram_test = self.get_gram_test(eX_test)
dist_tmp = self.model.decision_function(gram_test)
dist_matrix = dist_tmp.reshape(nTest, self.nTheta)
accumulative_proba = expit(dist_matrix)
prob = np.pad(
accumulative_proba,
pad_width=((0, 0), (1, 1)),
mode='constant',
constant_values=(0, 1))
prob = np.diff(prob)
return prob
def distant_to_theta(self, X_test):
nTest = X_test.shape[0]
eX_test = self.test_set_construct(X_test=X_test)
gram_test = self.get_gram_test(eX_test)
dist_tmp = self.model.decision_function(gram_test)
dist_matrix = dist_tmp.reshape(nTest, self.nTheta)
return dist_matrix
RED-SVM
最新推荐文章于 2024-09-27 10:11:28 发布
本文介绍了一种使用RED_KELM和REDSVM的分类算法,通过扩展特征和核技巧,实现了样本的高效表示和分类。RED_KELM利用核函数结合扩展部分,而REDSVM则将样本转换为线性不可分的高维空间进行处理。这两种方法在训练集构造、核矩阵计算和预测性能上进行了详细阐述。
摘要由CSDN通过智能技术生成