将数据预处理加入模型选择过程
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
np.random.seed(0)
iris = datasets.load_iris()
features = iris.data
target = iris.target
preprocess = FeatureUnion([("std", StandardScaler()), ("pca", PCA())])
pipe = Pipeline([("preprocess", preprocess),
("classifier", LogisticRegression())
])
search_space = [{
"preprocess__pca__n_components": [1, 2, 3],
"classifier__penalty": ["l1", "l2"],
"classifier__C": np.logspace(0, 4, 10)
}]
clf = GridSearchCV(pipe, search_space, cv=5, verbose=0, n_jobs=-1)
best_model = clf.fit(features, target)
best_model.best_estimator_.get_params()['preprocess__pca__n_components']
2