# encoding:utf-8 from sklearn.feature_selection import VarianceThreshold # 6个样本,3维的特征向量 X = [[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 1, 0], [0, 1, 1]] # 根据方差保留80%的向量 # 计算公式:var_thresh = p(1-p) sel = VarianceThreshold(threshold=(.8 * (1 - .8))) X=sel.fit_transform(X) print X from sklearn.datasets import load_iris from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 iris = load_iris() X, y = iris.data, iris.target print('原始特征:') print(X.shape) print(X[:5, :]) print() # 使用卡方分布选择2个维度的变量 X_new = SelectKBest(chi2, k=2).fit_transform(X, y) print('选取的特征:') print(X_new.shape) print(X_new[:5, :]) from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris from sklearn.feature_selection import SelectFromModel iris = load_iris() X, y = iris.data, iris.target print('原始特征:') print(X.shape) print(X[:5, :]) clf = RandomForestClassifier() clf = clf.fit(X, y) print('特征得分:') print(clf.feature_importances_ ) # 基于随机森林选择特征 model = SelectFromModel(clf, prefit=True) X_new = model.transform(X) print('选取的特征:') print(X_new.shape) print(X_new[:5, :])
特征选择
最新推荐文章于 2022-04-09 22:25:02 发布