from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import RFECV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
from sklearn.pipeline import Pipeline
#this is the classifier used for feature selection
clf_featr_sele = RandomForestClassifier(n_estimators=30, random_state = 42, class_weight="balanced")
rfecv = RFECV(estimator=clf_featr_sele, step=1, cv=5, scoring = 'roc_auc')
#you can have different classifier for your final classifier
clf = RandomForestClassifier(n_estimators=10, random_state = 42, class_weight="balanced")
CV_rfc = GridSearchCV(clf, param_grid={'max_depth':[2,3]}, cv= 5, scoring = 'roc_auc')
pipeline = Pipeline([('feature_sele',rfecv),('clf_cv',CV_rfc)])
pipeline.fit(X_train, y_train)
pipeline.predict(X_test)
sklearn特征选择,交叉验证,超参数搜索,流水线
最新推荐文章于 2024-02-28 10:17:06 发布