获取AdaBoost模型的重要特征
模型训练之后,想要得到比较重要的特征,可以通过python的sklearn包来实现。
python实现代码如下所示:
AdaBoostClassifier.py
# -*- coding: utf-8 -*-
import IOUtil as iou
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn import metrics
import csv
def report():
report = metrics.classification_report(y_test, y_predict)
m = metrics.confusion_matrix(y_test, y_predict)
print (report)
print (m)
def result():
pred= model.predict_proba(X_test)
result= pd. DataFrame(pred)
result.to_csv('result_AdaBoostClassifier.csv')
#### 训练
print ("AdaBoostClassifier Start")
X = iou.readArray("X_train.csv")
y = iou.readArray("y_train.csv")
y = y.ravel()
print ("AdaBoostClassifier Data OK")
from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier()
model.fit(X, y)
n=model.feature_importances_#模型的重要特征
print(n)
###### 预测
X_test = iou.readArray("X_test.csv")
y_test = iou.readArray("y_test.csv")
print ("AdaBoostClassifier predict...")
y_predict = model.predict(X_test)
result()
report()
print ("AdaBoostClassifier Done!!!!!!")
读取文件的代码如下所示:
IOUtil.py
import csv
import numpy as np
from sklearn.externals import joblib
def readArray( file ):
Data = []
reader = csv.reader(open(file),delimiter=',', quotechar='\'')
for row in reader:
row = [float(x) for x in row]
Data.append(row)
Data = np.array(Data)
return Data