1.基于树模型提取特征
# 使用树模型提取特征
import numpy as np
from sklearn import feature_selection
from sklearn.ensemble import GradientBoostingClassifier
matrix = np.array(X)
target = np.array(target)
temp = feature_selection.SelectFromModel(GradientBoostingClassifier()).fit(matrix, target)
indx = temp._get_support_mask().tolist()
scores = get_importance(temp.estimator_).tolist()
result = temp.transform(matrix).tolist()
return scores, indx, result
# X: array-like
# target: array-like
# http://scikit-# learn.org/stable/modules/generated/sklearn.feature_selection.SelectFromModel.html
2.基于L1,L2惩罚值提取特征
1 # 基于L1,L2惩罚值提取特征
2 import numpy as np
3 from sklearn import feature_selection
4 from sklearn.linear_model import LogisticRegression
5
6 matrix = np.array(arr0)
7 target = np.array(target)
8 temp = feature_selection.SelectFromModel(LogisticRegression(penalty="l1", C=0.1)).fit(matrix, target)
9 indx = temp._get_support_mask().tolist()
10 scores = get_importance(temp.estimator_).tolist()
11 result = temp.transform(matrix).tolist()
12 return scores, indx, result
13
14 # X: array-like
15 # target: array-like
16 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFromModel.html
3.递归特征消除法提取特征
1 # 递归特征消除法
2 import numpy as np
3 from sklearn import feature_selection
4 from sklearn.linear_model import LogisticRegression
5
6 matrix = np.array(X)
7 target = np.array(target)
8 temp = feature_selection.RFE(estimator=LogisticRegression(), n_features_to_select=n_features).fit(matrix, target)
9 scores = temp.ranking_.tolist()
10 indx = temp.support_.tolist()
11 result = temp.transform(matrix).tolist()
12 return scores, indx, result
13
14 # X: array-like
15 # target: array-like
16 # n-features: int
17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html
18
4.互信息选择法提取特征
1 # 互信息选择法
2 from minepy import MINE
3 import numpy as np
4 from sklearn import feature_selection
5
6 matrix = np.array(X)
7 target = np.array(target)
8 def mic(x, y):
9 m = MINE()
10 m.compute_score(x, y)
11 return (m.mic(), 0.5)
12 temp = feature_selection.SelectKBest(lambda X, Y: np.array(list(map(lambda x: mic(x, Y), X.T))).T[0], k=k).fit(matrix, target)
13 scores = temp.scores_.tolist()
14 indx = temp.get_support().tolist()
15 result = temp.transform(matrix).tolist()
16 return scores, indx, result
17
18 # X: array-like
19 # target: array-like
20 # k: int
21 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
22
5.利用相关系数选择特征
1 # 利用相关系数选择特征
2 import numpy as np
3 from sklearn import feature_selection
4 from sklearn.feature_selection import chi2
5
6 matrix = np.array(X)
7 target = np.array(target)
8 temp = feature_selection.SelectKBest(lambda X, Y: np.array(list(map(lambda x: abs(pearsonr(x, Y)[0]), X.T))), k=k).fit(matrix, target)
9 scores = temp.scores_.tolist()
10 indx = temp.get_support().tolist()
11 result = temp.transform(matrix).tolist()
12 return scores, indx, result
13
14 # X: array-like
15 # target: array-like
16 # k: int
17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
18
6.卡方检验法提取特征
1 # 卡方检验法提取特征
2 import numpy as np
3 from sklearn import feature_selection
4 from sklearn.feature_selection import chi2
5
6 matrix = np.array(X)
7 target = np.array(target)
8 temp = feature_selection.SelectKBest(chi2, k=k).fit(matrix, target)
9 scores = temp.scores_.tolist()
10 indx = temp.get_support().tolist()
11 result = temp.transform(matrix).tolist()
12 return scores, indx, result
13
14 # X: array-like
15 # target: array-like
16 # k: int
17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
18
7.利用方差选择特征
1 # 利用方差选择特征
2 import numpy as np
3 from sklearn import feature_selection
4
5 matrix = np.array(X)
6 temp = feature_selection.VarianceThreshold(threshold=t).fit(matrix)
7 scores = [np.var(el) for el in matrix.T]
8 indx = temp.get_support().tolist()
9 result = temp.transform(matrix).tolist()
10 return scores, indx, result
11
12 # X: array-like
13 # t: float
14 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html
15