代码实现:
import numpy as np
import pandas as pd
import sklearn.svm as svm
import sklearn.model_selection as ms
import sklearn.metrics as sm
import matplotlib.pyplot as plt
# 创建SVM的超参数集:
params = [{'kernel':['linear'], 'C':[1, 10, 100, 1000]},
{'kernel':['poly'], 'C':[1], 'degree':[2, 3]},
{'kernel':['rbf'], 'C':[1, 10, 100, 900, 1000], 'gamma':[1, 0.1, 0.01, 0.001]}]
data = pd.read_csv('C:/Users/81936/Desktop/balance.txt', delimiter=",")
data = np.array(data)
data1 = data[:, :-1]
data2 = data[:, -1]
data3 = []
for i in data2:
if i==' R':
data3.append(1)
if i==' B':
data3.append(2)
if i==' L':
data3.append(3)
data3 = np.array(data3)
data = np.column_stack((data1, data3))
x = np.array(data[:, :-1], dtype=float)
y = np.array(data[:, -1], dtype=float)
# 利用sklearn划分训练集和测试集
# ms.train_test_split() x和y是输入和输出,test_size用于确定划分测试集的比例,random_state为随机种子(用于确保每次划分的都一样)
train_x, test_x, train_y, test_y = ms.train_test_split(x, y, test_size=0.25, random_state=7)
# 构建svm分类器
# probability=True 可以获得置信概率
model = svm.SVC(probability=True)
# 网格搜索确定最优超参数
# params为网格参数,cv=5做5次交叉验证
model = ms.GridSearchCV(model, params, cv=5)
model.fit(train_x, train_y)
# 输出模型的预测效果
pred_test_y = model.predict(test_x)
acc = (pred_test_y == test_y).sum() / test_y.size
print(acc)
print(model.best_params_) # 获取最优的超参数
print(model.best_score_) # 获取最优的f1得分
print(model.best_estimator_) # 获取最优的模型
out:
{'C': 1000, 'gamma': 0.01, 'kernel': 'rbf'} 0.9808053077099063 SVC(C=1000, gamma=0.01, probability=True)
# 输出交叉验证的结果
for p, s in zip(model.cv_results_['params'],
model.cv_results_['mean_test_score']):
print(p, s)
out:
{'C': 1, 'kernel': 'linear'} 0.8932052161976664 {'C': 10, 'kernel': 'linear'} 0.9017158544955388 {'C': 100, 'kernel': 'linear'} 0.9017158544955388 {'C': 1000, 'kernel': 'linear'} 0.9017158544955388 {'C': 1, 'degree': 2, 'kernel': 'poly'} 0.9487531457332418 {'C': 1, 'degree': 3, 'kernel': 'poly'} 0.9787005261953787 {'C': 1, 'gamma': 1, 'kernel': 'rbf'} 0.893228094257607 {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'} 0.9209563029055137 {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} 0.9145504461221687 {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'} 0.8889270189887897 {'C': 10, 'gamma': 1, 'kernel': 'rbf'} 0.8312056737588653 {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'} 0.9423701670098377 {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} 0.9209563029055137 {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'} 0.903889270189888 {'C': 100, 'gamma': 1, 'kernel': 'rbf'} 0.8312056737588653 {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'} 0.9295355753832075 {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'} 0.9466026080988332 {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'} 0.9209563029055137 {'C': 900, 'gamma': 1, 'kernel': 'rbf'} 0.8312056737588653 {'C': 900, 'gamma': 0.1, 'kernel': 'rbf'} 0.9295355753832075 {'C': 900, 'gamma': 0.01, 'kernel': 'rbf'} 0.9786776481354382 {'C': 900, 'gamma': 0.001, 'kernel': 'rbf'} 0.9038435140700068 {'C': 1000, 'gamma': 1, 'kernel': 'rbf'} 0.8312056737588653 {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'} 0.9295355753832075 {'C': 1000, 'gamma': 0.01, 'kernel': 'rbf'} 0.9808053077099063 {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'} 0.9038435140700068