首先导入需要用到的包
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import roc_curve,auc
读取数据人类嘴型数据,查看数据格式
df=pd.read_csv("training.csv",header=None)
df.info()
第六列为标签,标签为1意味着张嘴,标签为-1意味着嘴没有动,将标签从数据集中分离
target=np.array(df[6])
df=df.drop(columns=6)
将数据集分为训练集和测试集
train_data,test_data=train_test_split(df,random_state=2,train_size=0.7,test_size=0.3)
train_target,test_target=train_test_split(target,random_state=2,train_size=0.7,test_size=0.3)
网格搜索,找到最优参数,kflod为3折交叉验证,由于电脑性能问题,在gridsearchcv中未加入kflod
model=svm.SVC(kernel='rbf',probability=True)
# kflod = StratifiedKFold(n_splits=3, shuffle = True,random_state=7)
grid_search = GridSearchCV(model,{"C":[0.1,1,10],"gamma":[0.1,0.2,0.3]},n_jobs = -1,refit=bool)
grid_result = grid_search.fit(train_data, train_target)
print(grid_search.best_params_)
利用最优参数进行预测
classifier=svm.SVC(C=0.1,gamma=0.2,kernel='rbf')
from sklearn.metrics import classification_report
clf=classifier.fit(train_data,train_target)
predict_y = classifier.predict(train_data)
绘制ROC曲线
FPR,TPR,threshold=roc_curve(train_target,predict_y,pos_label=1)
AUC=auc(FPR,TPR)
plt.figure()
plt.title('ROC CURVE (AUC={:.2f})'.format(AUC))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.xlim([0.0,1.0])
plt.ylim([0.0,1.0])
plt.plot(FPR,TPR,color='g')
plt.plot([0, 1], [0, 1], color='m', linestyle='--')
plt.show()