目录
一、核技巧与非线性降维原理
1.1 核函数数学本质
核函数定义:
常用核函数:
-
RBF核:
exp(-γ||x-y||²)
-
多项式核:
(γxᵀy + r)^d
-
Sigmoid核:
tanh(γxᵀy + r)
1.2 瑞士卷数据集处理对比
from sklearn.datasets import make_swiss_roll
from sklearn.decomposition import KernelPCA
X, color = make_swiss_roll(n_samples=1000, noise=0.1)
# 不同核函数效果对比
kernels = ['linear', 'rbf', 'sigmoid', 'cosine']
plt.figure(figsize=(15,10))
for i, kernel in enumerate(kernels):
kpca = KernelPCA(n_components=2, kernel=kernel, gamma=0.04)
X_reduced = kpca.fit_transform(X)
plt.subplot(2, 2, i+1)
plt.scatter(X_reduced[:,0], X_reduced[:,1], c=color, cmap=plt.cm.Spectral)
plt.title(f'Kernel: {kernel.upper()}')
二、Scikit-Learn实战:参数优化全流程
2.1 超参数网格搜索模板
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
# 构建kPCA+SVM流水线
pipe = Pipeline([
('kpca', KernelPCA(n_components=2)),
('svm', SVC())
])
# 定义参数网格
param_grid = [{
'kpca__kernel': ['rbf', 'sigmoid'],
'kpca__gamma': np.linspace(0.03, 0.05, 10),
'kpca__n_components': [2, 3],
'svm__C': [0.1, 1, 10]
}]
# 执行网格搜索
grid = GridSearchCV(pipe, param_grid, cv=3)
grid.fit(X_train, y_train)
print(f"最佳参数组合: {grid.best_params_}")
print(f"最佳验证准确率: {grid.best_score_:.3f}")