LDA线性判别分析
实现有监督降维,代码将iris数据降到2维
"""
Auther:Deniu He
Date:2021-03-12
"""
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
class LDA():
def __init__(self,X,y):
self.X = X
self.y = y
self.N = self.X.shape[0] ## the number of training samples
self.labels = np.unique(y) ## the labels
self.nClass = len(self.labels) ## the number of labels(classes)
self.nSample = []
self.nAtt = self.X.shape[1] ## the number of features(attributes)
self.m = np.mean(self.X,axis=0) ## the mean of all samples
self.Mean = self.get_Mean()
self.Sb = self.get_Sb()
self.Sw = self.get_Sw()
self.eigvalue, self.eigvector = np.linalg.eig(np.linalg.inv(self.Sw) @ self.Sb)
def get_Mean(self):
Mean = np.zeros((self.nClass,self.nAtt))
for i, lab in enumerate(self.labels):
idx_list = np.where(self.y == lab)
Mean[i] = np.mean(self.X[idx_list],axis=0)
self.nSample.append(len(idx_list))
return Mean
## 计算类间散度矩阵
def get_Sb(self):
Sb = np.zeros((self.nAtt,self.nAtt))
for i, lab in enumerate(self.labels):
A = (self.Mean[i]-self.m).reshape(-1,1)
AT = A.T
Sb += (self.nSample[i]/self.N)* A @ AT
return Sb
# def get_Sb1(self):
# Sb = np.zeros((self.nAtt,self.nAtt))
# A = self.Mean - self.m
# Sb = A.T @ A
# return Sb
# def get_Sw(self):
# Sw = np.zeros((self.nAtt, self.nAtt))
# for i, lab in enumerate(self.labels):
# Swi = np.zeros((self.nAtt, self.nAtt))
# X_i = self.X[np.where(self.y == lab)] - self.Mean[i]
# for x in X_i:
# A = x.reshape(-1,1)
# AT = A.T
# Swi += A @ AT
# Sw += (self.nSample[i] / self.N) * Swi
# return Sw
def get_Sw(self):
Sw = np.zeros((self.nAtt, self.nAtt))
for i, lab in enumerate(self.labels):
X_i = self.X[np.where(self.y == lab)] - self.Mean[i]
Sw += (self.nSample[i] / self.N) * X_i.T @ X_i
return Sw
if __name__ == '__main__':
X,y = load_iris(return_X_y=True)
lda = LDA(X=X,y=y)
# print(lda.N)
# print(lda.nAtt)
# print(lda.Mean)
# print(lda.Sb)
# lda.get_Sb()
# print(lda.Sb)
# print(lda.Sw.shape)
# print(lda.Sw1.shape)
# print(type(lda.Sw))
# print(type(lda.Sw1))
# print(np.linalg.det(lda.Sw))
# print(lda.eigvalue)
# print(lda.eigvector)
w = np.vstack([lda.eigvector[:,0],lda.eigvector[:,1]]).T
transform_X = X.dot(w)
print(transform_X)
plt.scatter(transform_X[:,0],transform_X[:,1],c=y)
plt.show()
降维展示 dimensionality reduction visualization