获取线性支持向量机LinearSVC模型的重要特征
模型训练之后,想要得到比较重要的特征,可以通过python的sklearn包来实现。
python实现代码如下所示:
LinearSVC.py
# -*- coding: utf-8 -*-
import IOUtil as iou
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn import metrics
import csv
def report():
report = metrics.classification_report(y_test, y_predict)
m = metrics.confusion_matrix(y_test, y_predict)
print (report)
print (m)
def result():
dec = model.decision_function(X_test)
print (type(dec))
np.savetxt('LinearSVC.csv', dec, delimiter=',')
#### 训练
print ("LinearSVC Start")
X = iou.readArray("X_train.csv")
y = iou.readArray("y_train.csv")
y = y.ravel()
print ("LinearSVC Data OK")
from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier()
model.fit(X, y)
n=model.coef_#模型的重要特征
print(n)
###### 预测
X_test = iou.readArray("X_test.csv")
y_test = iou.readArray("y_test.csv")
print ("LinearSVC predict...")
y_predict = model.predict(X_test)
result()
report()
print ("LinearSVC Done!!!!!!")
读取文件的代码如下所示:
IOUtil.py
import csv
import numpy as np
from sklearn.externals import joblib
def readArray( file ):
Data = []
reader = csv.reader(open(file),delimiter=',', quotechar='\'')
for row in reader:
row = [float(x) for x in row]
Data.append(row)
Data = np.array(Data)
return Data