先亮代码
from sklearn.feature_selection import RFECV
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import *
from sklearn.multiclass import OneVsRestClassifier
# 加载数据部分
data = pd.read_csv("./data/sensoringData_feature_prepared_20_19.0_2.csv",header=0)
test_data = pd.read_csv('./data/feature_df_list_400_total_fil_w50_ella_js_june24_july9_new_july20.csv')
del test_data['Unnamed: 0']
test_data = test_data.rename(columns={'type': 'activity'})
data = data[test_data.columns.tolist()]
train_X = data.iloc[:,:-1]
train_y = data.iloc[:,-1]
train_y.replace('Walking','0',inplace=True)
train_y.replace('Inactive','1',inplace=True)
train_y.replace('Active','2',inplace=True)
train_y.replace('Driving','3',inplace=True)
test_X = test_data.iloc[:,:-1]
test_y = test_data.iloc[:,-1]
# 特征缩放
scaler = StandardScaler()
train_X = scaler.fit_transform(train_x)
test_X = scaler.transform(test_x)
# svc估算器,再包装成一个多分类器
svc = OneVsRestClassifier(SVC(kernel="linear",max_iter=10000,C=10000,gamma=0.01))
# >>>>>>>>>>本篇博客重点<<<<<<<<<<<,自定义评分函数返回一个可调用scorer对象
my_scorer = make_scorer(precision_score, average='weighted')
# average: string, [None, ‘binary’ (default), ‘micro’, ‘macro’, ‘samples’, ‘weighted’]
"""
scoring: string, callable or None, optional, (default=None)
A string (see model evaluation documentation) or a scorer callable object /
function with signature scorer(estimator, X, y).
"""
# StratifiedKFold: 基于分层抽样的K折交叉验证,参数为折数
# RFECV: 特征排序与递归特征消除和交叉验证选择最佳数量的特征。cv:cross_validation
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(10),
scoring=my_scorer) # scoring参数传递自定义可调用的scorer对象
rfecv.fit(train_X, train_y)
RFECV函数的属性
Attributes
n_features_: int
The number of selected features with cross-validation.
support_: array of shape [n_features]
The mask of selected features.
ranking_: array of shape [n_features]
The feature ranking, such that ranking_[i] corresponds to the ranking position of the i-th feature. Selected (i.e., estimated best) features are assigned rank 1.
grid_scores_: array of shape [n_subsets_of_features]
The cross-validation scores such that grid_scores_[i] corresponds to the CV score of the i-th subset of features.
estimator_: object
The external estimator fit on the reduced dataset.