'''
Auther: DeniuHe
Date:2021-03-16
'''
import numpy as np
from sklearn import datasets
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
class Kernel_PCA():
def __init__(self,X):
self.X = X
self.N = self.X.shape[0]
self.K = rbf_kernel(X=self.X,gamma=15)
self.In = np.ones((self.N,self.N)) * (1/self.N)
self.K_hat = self.K - self.K @ self.In - self.In @ self.K + self.In @ self.K @ self.In
self.L, self.U = np.linalg.eig(self.K_hat)
# dimensionality reduction
def DR(self,n_component):
a = [i for i in range(n_component)]
ord_ids = np.flipud(np.argsort(self.L))
tar_ids = ord_ids[a]
trans_X = self.K_hat @ self.U[:,tar_ids] / np.sqrt(self.L[tar_ids])
return trans_X
if __name__ == '__main__':
X, y = datasets.make_moons(noise=0.1,n_samples=300,random_state=100)
# plt.scatter(X[:,0],X[:,1],c=y)
# plt.show()
# X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100)
kpca = Kernel_PCA(X=X)
trans_X = kpca.DR(n_component=2)
plt.scatter(trans_X[:,0],trans_X[:,1],c=y)
plt.show()
X, y = datasets.make_moons(noise=0.1, n_samples=300, random_state=100)
diaobao = KernelPCA(kernel='rbf',n_components=2,gamma=15)
X_diao = diaobao.fit_transform(X)
plt.scatter(X_diao[:,0],X_diao[:,1],c=y)
plt.show()
两个月亮
本文代码效果 (gamma=15)
sklearn调包效果(参数保持相同,即gamma=15)
结果方向貌似反了,不过不要紧。
完整版 :
'''
Auther: DeniuHe
Date:2021-03-16
'''
import numpy as np
from sklearn import datasets
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
class Kernel_PCA():
def __init__(self,X,gamma=None):
self.X = X
self.N = self.X.shape[0]
self.K = rbf_kernel(X=self.X,gamma=gamma)
self.In = np.ones((self.N,self.N)) * (1/self.N)
self.K_hat = self.K - self.K @ self.In - self.In @ self.K + self.In @ self.K @ self.In
self.L, self.U = np.linalg.eig(self.K_hat)
# dimensionality reduction
def DR(self,n_component):
a = [i for i in range(n_component)]
ord_ids = np.flipud(np.argsort(self.L))
tar_ids = ord_ids[a]
trans_X = self.K_hat @ self.U[:,tar_ids] / np.sqrt(self.L[tar_ids])
return trans_X
def DR_for_test_instance(self,X,n_component,gamma):
a = [i for i in range(n_component)]
ord_ids = np.flipud(np.argsort(self.L))
tar_ids = ord_ids[a]
##
nTest = X.shape[0]
Inl = np.ones((self.N,nTest))*(1/self.N)
K_test = rbf_kernel(X=self.X,Y=X,gamma=gamma)
print("K_test::",K_test.T.shape)
K_test_hat = K_test.T - K_test.T @ self.In - Inl.T @ self.K + Inl.T @ self.K @ self.In
print("K_test_hat::",K_test_hat.shape)
trans_X = K_test_hat @ self.U[:, tar_ids] / np.sqrt(self.L[tar_ids])
print("trans_X::",trans_X.shape)
return trans_X
if __name__ == '__main__':
X, y = datasets.make_circles(noise=0.05,factor=0.3, n_samples=300,random_state=100)
plt.scatter(X[:,0],X[:,1],c=y)
plt.show()
gamma = 9
# kpca = Kernel_PCA(X=X,gamma=gamma)
# trans_X = kpca.DR(n_component=2)
# plt.scatter(trans_X[:,0],trans_X[:,1],c=y)
# plt.show()
#
# diaobao = KernelPCA(kernel='rbf',n_components=2,gamma=gamma)
# X_diao = diaobao.fit_transform(X)
# plt.scatter(X_diao[:,0],X_diao[:,1],c=y)
# plt.show()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=100)
kpca = Kernel_PCA(X=X_train,gamma=gamma)
trans_X = kpca.DR(n_component=2)
plt.scatter(trans_X[:,0],trans_X[:,1],c=y_train)
plt.show()
trans_X_test = kpca.DR_for_test_instance(X=X_test,n_component=2,gamma=10)
plt.scatter(trans_X_test[:,0],trans_X_test[:,1],c=y_test)
plt.show()
kernel PCA 公式推导 推荐博文: