import相关工具箱:
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
L2-normalization(可选):
def l2_norm(data):
for i in range(data.shape[0]):
data[i, :] = normalize(data[i, :].reshape((1, -1)), norm='l2') # L2归一化
return data
train_annotated_feature = l2_norm(train_annotated_feature)
train_candidates_feature = l2_norm(train_candidates_feature)
test_annotated_feature = l2_norm(test_annotated_feature)
test_candidates_feature = l2_norm(test_candidates_feature)
用train_annotated_feature数据训练PCA模型,然后对其他数据进行主成分分析:
pca = PCA(n_components=300, copy=True, whiten=False) # 降维到300dimensions
pca.fit(train_annotated_feature)
pca_train_annotated_feature = pca.transform(train_annotated_feature)
pca_train_candidates_feature = pca.transform(train_candidates_feature)
pca_test_annotated_feature = pca.transform(test_annotated_feature)
pca_test_candidates_feature = pca.transform(test_candidates_feature)