SVM 非线性

# _*_coding: UTF-8 _*_
# team: THE COOLEST
# developer: 
# time: 2022/11/19 下午9:26
# file name: krkopt_svm.PY
# tools: PyCharm

import pandas as pd
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, auc, roc_curve
import seaborn as sns
import matplotlib.pyplot as plt

#读取数据
data = pd.read_csv('krkopt.data', header = None)
data.dropna(inplace = True)

#将样本数值化
for i in [0,2,4]:
    data.loc[data[i] == 'a', i] = 1
    data.loc[data[i] == 'b', i] = 2
    data.loc[data[i] == 'c', i] = 3
    data.loc[data[i] == 'd', i] = 4
    data.loc[data[i] == 'e', i] = 5
    data.loc[data[i] == 'f', i] = 6
    data.loc[data[i] == 'g', i] = 7
    data.loc[data[i] == 'h', i] = 8

#将标签数值化
# #method1:速度慢
# for i in range(data.shape[0]):
#     if data.iloc[i,6] == "draw":
#         data.iloc[i,6] = 1
#     else:
#         data.iloc[i,6] = -1
#method2:速度快
data.loc[data[6]!='draw', 6] = -1
data.loc[data[6]=='draw', 6] = 1

#归一化处理
for i in range(6):
    data[i] = (data[i] - data[i].mean()) / data[i].std()

#拆分训练集和测试集
X_train, X_test, y_train, y_test =  train_test_split(data.iloc[:,:6],data[6].astype('int').values,test_size=0.8,random_state=1)

# 寻找C和gamma的粗略范围
Cscale = [i for i in range(100,201,10)]
gammaScale = [i/10 for i in range(1,11)]
cv_score = 0.0
for i in Cscale:
    for j in  gammaScale:
        modle = SVC(kernel='rbf', C=i, gamma=j)
        score = cross_val_score(modle, X_train, y_train, cv=5, scoring="accuracy")
        if score.mean() > cv_score:
            cv_score = score.mean()
            savei = i
            savej = j*100

#寻找C和gamma的精细范围
Cscale = [i for i in range(savei-5,savei+5)]
gammaScale = [i/100+0.01 for i in range(int(savej)-5,int(savej)+5)]
for i in Cscale:
    for j in  gammaScale:
        modle = SVC(kernel='rbf', C=i, gamma=j)
        score = cross_val_score(modle, X_train, y_train, cv=5, scoring="accuracy")
        if score.mean() > cv_score:
            cv_score = cv_score.mean()
            savei = i
            savej = j
#将确定好的参数重新建立模型
model = SVC(kernel='rbf', C=savei, gamma=savej)
model.fit(X_train, y_train)
pre = model.predict(X_test)
model.score(X_test, y_test)

#绘制AUC和EER模型
cm = confusion_matrix(y_test, pre, labels=[-1,1], sample_weight=None)
f,ax=plt.subplots()
sns.heatmap(cm,annot=True,ax=ax) #画热力图
ax.set_title('confusion matrix') #标题
ax.set_xlabel('predict') #x轴
ax.set_ylabel('true') #y轴
fpr,tpr,threshold = roc_curve(y_test, pre) ###计算真正率和假正率
roc_auc = auc(fpr,tpr) ###计算auc的值,auc就是曲线包围的面积,越大越好
plt.figure()
lw = 2
plt.figure(figsize=(10,10))
plt.plot(fpr, tpr, color='darkorange',lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) ###假正率为横坐标,真正率为纵坐标做曲线
plt.plot([0, 1], [1, 0], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()






print(data)
print(X_test.shape)
print(y_test)



原文链接:https://blog.csdn.net/weixin_44389971/article/details/106887694

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值