选取特征x中与标签y强相关的特征排名
def feature_SelectKBest(x,y):
x = (x - x.min()) / (x.max() - x.min())# standardize data
x.dropna(axis=1, how='all', thresh=None, subset=None, inplace=True)#clear the data being NaN
bestfeatures = SelectKBest(score_func=f_regression, k=5)
fit = bestfeatures.fit(x,y)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(x.columns)
#concat two dataframes for better visualization
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score'] #naming the dataframe columns
myx=featureScores.nlargest(19,'Score').iloc[:,0].nsmallest(19)#统计前19个中最大的数的raw_x_train前19序号
# 2,7,8,9,11,16,17,18,20,22,23,25,26,27,28,33,38,54,55#19
for col in myx:
print(col,end=",")
输出为:
2,7,8,9,11,16,17,18,20,22,23,25,26,27,28,33,38,54,55,