利用随机森林进行特征选择,输出特征的重要程度
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
import numpy as np
#Load boston housing dataset as an example
boston = load_boston()
X = boston["data"]
Y = boston["target"]
names = boston["feature_names"]
rf = RandomForestRegressor()
rf.fit(X, Y)
print("Features sorted by their score:")
print(sorted(zip(map(lambda x: round(x, 4), rf.feature_importances_), names),
reverse=True))
result:
Features sorted by their score:
[(0.46600000000000003, 'LSTAT'), (0.34060000000000001, 'RM'), (0.069000000000000006, 'DIS'), (0.042200000000000001, 'CRIM'), (0.0207, 'NOX'), (0.0143, 'AGE'), (0.012500000000000001, 'TAX'), (0.0124, 'B'), (0.010500000000000001, 'PTRATIO'), (0.0080000000000000002, 'INDUS'), (0.0025000000000000001, 'RAD'), (0.001, 'ZN'), (0.00040000000000000002, 'CHAS')]