python特征选择

最新推荐文章于 2024-05-17 19:25:33 发布

吃我老猪一钉耙

最新推荐文章于 2024-05-17 19:25:33 发布

阅读量956

点赞数

本文链接：https://blog.csdn.net/zlf19910726/article/details/80389662

版权

# -*- coding: UTF-8 -*-
import numpy as np
from sklearn.feature_selection import SelectKBest 
from sklearn.feature_selection import chi2
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn import cross_validation
###########################################
#卡方(Chi2)检验
e14601 = np.loadtxt(open("/home/zlf/Documents/e14601_data_set_n0.csv", "rb"), delimiter=",", skiprows=0)  

a = e14601.shape;
print(a[0], a[1]);

data = e14601[:, 0:a[1] - 1];
label = e14601[:, a[1] - 1];

select = 48
data_new = SelectKBest(chi2, select).fit_transform(data, label)
print(data_new.shape)

e14601_new = np.zeros([a[0], select+1])
e14601_new[:,:select] = data_new;
e14601_new[:,select] = label;
print(e14601_new.shape);

np.savetxt('/home/zlf/Documents/e14601_data_set_n0_select.csv', e14601_new, delimiter=',')

###########################################
# 基于模型的特征排序 (Model based ranking)
e14601 = np.loadtxt(open("/home/zlf/Documents/e14601_data_set.csv", "rb"), delimiter=",", skiprows=0)  
a = e14601.shape;
data = e14601[:, 0:a[1] - 1];
label = e14601[:, a[1] - 1];

rf = RandomForestClassifier(n_estimators=20, max_depth=4)
scores = []
# 单独采用每个特征进行建模，并进行交叉验证
for i in range(a[1]-1):
    score = cross_val_score(rf, data[:, i:i+1], label, scoring="accuracy",  # 注意X[:, i]和X[:, i:i+1]的区别
                            cv=cross_validation.ShuffleSplit(a[0], n_iter=3, test_size=0.3, random_state=0))
    scores.append((format(np.mean(score), '.3f'), i))
A = sorted(scores, reverse=True);
print(A)
print(A[0][1])

select = 48;
e14601_new = np.zeros([a[0], select+1])
print(e14601_new.shape)
for i in range(select):
    e14601_new[:, i]=e14601[:, A[i][1]]

e14601_new[:,select] = label;
print(e14601_new[:,0])
np.savetxt('/home/zlf/Documents/e14601_data_set_n0_select0.csv', e14601_new, delimiter=',')

吃我老猪一钉耙

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
python特征选择

# -*- coding: UTF-8 -*-import numpy as npfrom sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2from sklearn.ensemble import RandomForestClassifierfrom sklear...
复制链接

扫一扫