# 单变量特征选择
from pandas import read_csv
from numpy import set_printoptions
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
filename = 'D:/0520代码+数据/第3、4次课:代码+数据/pima_data.csv'
names = ['preq', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(filename, names=names)
array = data.values
X = array[:, 0:8]
Y = array[:, 8]
test = SelectKBest(score_func=chi2, k=4)
fit = test.fit(X, Y)
set_printoptions(precision=3)
print(fit.scores_)
featrues = fit.transform(X)
print(featrues)
# 递归特征消除 RFE
from pandas import read_csv
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
filename = 'D:/0520代码+数据/第3、4次课:代码+数据/pima_data.csv'
names = ['preq', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(filename, names=names)
array = data.values
X = array[:, 0:8]
Y = array[:, 8]
model = LogisticRegression()
rfe = RFE(model, n_features_to_select=3)
fit = rfe.fit(X, Y)
print('特征个数: ')
print(fit.n_features_)
print('备选特征: ')
print(fit.support_)
print('特征排名: ')
print(fit.ranking_)
# 主成分分析PCA
from pandas import read_csv
from sklearn.decomposition import PCA
filename = 'D:/0520代码+数据/第3、4次课:代码+数据/pima_data.csv'
names = ['preq', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(filename, names=names)
array = data.values
X = array[:, 0:8]
Y = array[:, 8]
pca = PCA(n_components=3)
fit = pca.fit(X)
print('解释方差: %s' % fit.explained_variance_ratio_)
print(fit.components_)
# Extra Trees classifier
from pandas import read_csv
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
filename = 'D:/0520代码+数据/第3、4次课:代码+数据/pima_data.csv'
names = ['preq', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(filename, names=names)
array = data.values
X = array[:, 0:8]
Y = array[:, 8]
model = ExtraTreesClassifier()
fit = model.fit(X, Y)
print(fit.feature_importances_)
机械学系--数据的特征选定
最新推荐文章于 2024-10-03 09:02:12 发布