模型原型
class sklearn.decomposition.KernelPCA(n_components=None,kernel=’linear’,gamma=None,degress=3,coef0=1,kernel_params=None,alpha=1.0,fit_inverse_transform=False,eigen_solver=’auto’,tol=0,max_iter=None,remove_zero_eig=False)
参数
- n_components
- kernel:指定核函数
- ’linear’:线性核
K(x⃗ ,z⃗ )=x⃗ ⋅z⃗ - ‘poly’:多项式核
K(x⃗ ,z⃗ )=(γ(x⃗ ⋅z⃗ +1)+r)p - ‘rbf’:高斯核函数
K(x⃗ ,z⃗ )=exp(−γ||x⃗ −z⃗ ||2) - ‘sigmoid’:
K(x⃗ ,z⃗ )=tanh(γ(x⃗ ⋅z⃗ )+r) - ‘precomputed’:表示提供kernel matrix提供一个可调用对象:用于计算kernel matrix
- ’linear’:线性核
- gamma
- 核函数为’rbf’,’poly’,’sigmoid’时:核函数的系数
- ‘auto’:系数为1/n_features
- degree:当核函数是多项式时,指定多项式的系数,对于其他核函数无效
- coef0:指定函数中的自由项(当核函数是’poly’和’sigmoid’有效时才使用它)
- kernel_params:当核函数是可调用对象时才使用它
- alpha:岭回归的超参数,用于计算逆转换矩阵(当fit_inverse_transform=True时有效)
- fit_inverse_transform:当为True时,用于计算逆转换矩阵
- eigen_solver:指定求解特征值的算法
- ’auto’:自动选择
- ‘dense’:dense特征值求解器
- ‘arpack’:arpack特征值求解器,用于特征数量远小于样本数量的情形
- tol:指定arpock特征值求解器的收敛阙值(0表示自动选择阙值)
- max_iter:指定arpock特征值求解器的最大迭代次数(None表示自动选择)
- remove_zero_eig:如果为True则会移除所有为零的特征值;n_coments=None也会移除所有为零的特征值
属性
- lambdas_:核化矩阵的特征值
- alphas:核化矩阵的特征向量
- dual_coef_:逆转换矩阵
方法
- fit(X[,y])
- transform(X)
- fit_transform(X,[,y])
- inverse_transform(X)
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets,decomposition,manifold
加载数据
def load_data():
iris=datasets.load_iris()
return iris.data,iris.target
使用PCA类
def test_KPCA(*data):
X,y=data
kernels=['linear','poly','rbf','sigmoid']
for kernel in kernels:
kpca=decomposition.KernelPCA(n_components=None,kernel=kernel)
kpca.fit(X)
print('kernel=%s --> lambdas:%s '%(kernel,kpca.lambdas_))
X,y=load_data()
test_KPCA(X,y)
降维后样本分布图
def polt_KPCA(*data):
X,y=data
kernels=['linear','poly','rbf','sigmoid']
fig=plt.figure()
colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2))
for i ,kernel in enumerate(kernels):
kpca=decomposition.KernelPCA(n_components=2,kernel=kernel)
kpca.fit(X)
X_r=kpca.transform(X)
ax=fig.add_subplot(2,2,i+1)
for label,color in zip(np.unique(y),colors):
position=y==label
ax.scatter(X_r[position,0],X_r[position,1],label='target=%d'%label,color=color)
ax.set_xlabel('X[0]')
ax.set_ylabel('X[1]')
ax.legend(loc='best')
ax.set_title('kernel=%s'%kernel)
plt.suptitle('KPCA')
plt.show()
polt_KPCA(X,y)
多项式核的参数影响
def plot_KPC_poly(*data):
X,y=data
fig=plt.figure()
colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
Params=[(3,1,1),(3,10,1),(3,1,10),(3,10,10),(10,1,1),(10,10,1),(10,1,10),(10,10,10)]
for i,(p,gamma,r) in enumerate(Params):
kpca=decomposition.KernelPCA(n_components=2,kernel='poly',gamma=gamma,degree=p,coef0=r)
kpca.fit(X)
X_r=kpca.transform(X)
ax=fig.add_subplot(2,4,i+1)
for label,color in zip(np.unique(y),colors):
position=y==label
ax.scatter(X_r[position,0],X_r[position,1],label='target=%d'%label,color=color)
ax.set_xlabel("X[0]")
ax.set_xticks([])
ax.set_yticks([])
ax.set_ylabel('X[1]')
ax.legend(loc='best')
ax.set_title(r'$(%s(x \cdot z+1)+%s)^{%s}$'%(gamma,r,p))
plt.suptitle('KPCA-Poly')
plt.show()
plot_KPC_poly(X,y)
高斯核的参数影响
def plot_KPCA_rbf(*data):
X,y=data
fig=plt.figure()
colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
Gammas=[0.5,1,4,10]
for i,gamma in enumerate(Gammas):
kpca=decomposition.KernelPCA(n_components=2,kernel='rbf',gamma=gamma)
kpca.fit(X)
X_r=kpca.transform(X)
ax=fig.add_subplot(2,2,i+1)
for label,color in zip(np.unique(y),colors):
position=y==label
ax.scatter(X_r[position,0],X_r[position,1],label='target=%d'%label,color=color)
ax.set_xlabel('X[0]')
ax.set_xticks([])
ax.set_yticks([])
ax.set_ylabel('X[1]')
ax.legend(loc='best')
ax.set_title(r'\exp(-%s||x-z||^2)$'%gamma)
plt.suptitle('KPCA-rbf')
plt.show()
plot_KPCA_rbf(X,y)
sigmoid核的参数影响
def plot_KPCA_sigmoid(*data):
X,y=data
fig=plt.figure()
colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
(0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)
Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),(0.2,0.2)]
for i,(gamma,r) in enumerate(Params):
kpca=decomposition.KernelPCA(n_components=2,kernel='sigmoid',gamma=gamma,coef0=r)
kpca.fit(X)
X_r=kpca.transform(X)
ax=fig.add_subplot(3,2,i+1)
for label,color in zip(np.unique(y),colors):
position=y==label
ax.scatter(X_r[position,0],X_r[position,1],label='target=%d'%label,color=color)
ax.set_xlabel('X[0]')
ax.set_xticks([])
ax.set_yticks([])
ax.set_ylabel('X[1]')
ax.legend(loc='best')
ax.set_title(r'$tanh(%s(x\cdot z)+%s)$'%(gamma,r))
plt.suptitle('KPCA-sigmoid')
plt.show()
plot_KPCA_sigmoid(X,y)