import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
iris_data = np.loadtxt('iris_train_data.csv', skiprows=1, delimiter=',',usecols=[1,2,3,4,5])
iris = iris_data[:,:4]
labels = iris_data[:,4]
#手工计算LDA
def scatter_matrix(x):
return x.T.dot(x)
Sw = 0
Sb = 0
type_labels = np.unique(labels)
total_mean = iris.mean(axis=0)
total_sample = iris.shape[0]
for i_label in type_labels:
t_data = iris[labels==i_label,:]
t_data_mean = t_data.mean(axis=0)
n_data = t_data.shape[0]
Sw += n_data/total_sample * scatter_matrix(t_data-t_data_mean)
Sb += n_data/total_sample * scatter_matrix((t_data_mean-total_mean).reshape(1,-1))
lda2_eig_vals, lda2_eig_vecs = np.linalg.eig(np.linalg.inv(Sw).dot(Sb))
index = np.argsort(-lda2_eig_vals)
W = lda2_eig_vecs[:,index[:2]]
lda_x2d_hand = iris.dot(W)
#sklearn验证
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=2, solver='eigen')
lda_x2d_sklearn = lda.fit_transform(iris,labels)
print('sklearn'+'.'*30)
print(lda_x2d_sklearn[:5,:])
print('hand'+'.'*30)
print(lda_x2d_hand[:5,:])
plt.figure(1)
plt.scatter(lda_x2d_sklearn[:,0], lda_x2d_sklearn[:,1], marker='^',c=labels)
plt.title('sklearn')
plt.figure(2)
plt.scatter(lda_x2d_hand[:,0], lda_x2d_hand[:,1], marker='o',c=labels)
plt.title('hand')
plt.show()
sklearn..............................
[[-2.10354239 2.00968647]
[-0.51738942 1.36909346]
[-1.59470436 1.71237822]
[ 1.35641099 1.63078232]
[ 1.53894518 2.16436611]]
hand..............................
[[-2.15491766 -2.04210175]
[-0.54982651 -1.39581354]
[-1.64033069 -1.74167954]
[ 1.32989802 -1.66412388]
[ 1.50672047 -2.20368217]]