文件下载地址:
链接: https://pan.baidu.com/s/1aa4dlvZLity-H6D87BozuA 提取码: y3db
"""
网格搜索 通过不同参数组合尝试训练网络
获得最优参数、最优模型
"""
import numpy as np
import sklearn.naive_bayes as nb
import matplotlib.pyplot as mp
import sklearn.svm as svm
import sklearn.model_selection as ms
import sklearn.metrics as sm
# 加载数据
data = np.loadtxt("./multiple2.txt", delimiter=",")
x = data[:, :2].astype("float")
y = data[:, -1].astype("float")
print(x.shape, x.dtype)
print(y.shape, y.dtype)
train_x, test_x, train_y, test_y = \
ms.train_test_split(x, y, test_size=0.25, random_state=7)
# SVC probability 是否保存置信概率
model = svm.SVC(probability=True)
# 设置网格参数
params = [{'kernel': ['linear'], 'C': [1, 10, 100, 1000]},
{'kernel': ['poly'], 'C': [1], 'degree': [2, 3]},
{'kernel': ['rbf'], 'C': [1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001]}]
# 网格搜索 确定最优超参数
model = ms.GridSearchCV(model, params, cv=5)
model.fit(train_x, train_y)
print(model.best_params_) # 最优参数
print(model.best_score_) # 最好得分
print(model.best_estimator_)
# 输出交叉验证结果
for p, s in zip(model.cv_results_["params"],
model.cv_results_["mean_test_score"]):
print(p, s)
prd_test_y = model.predict(test_x)
print(sm.classification_report(test_y, prd_test_y))
# 准备分类边界数据
left, right = x[:, 0].min() - 1, x[:, 0].max() + 1
bottom, top = x[:, 1].min() - 1, x[:, 1].max() + 1
# 可视区域 划分维 500*500的区域
n = 500
grid_x, grid_y = np.meshgrid(np.linspace(left, right, n), np.linspace(bottom, top, n))
mesh_x = np.column_stack((grid_x.ravel(), grid_y.ravel()))
mesh_z = model.predict(mesh_x)
grid_z = mesh_z.reshape(grid_x.shape)
# 绘制散点、分类结果
mp.figure('SVM probability Classification', facecolor='lightgray')
mp.title('SVM probability Classification', fontsize=20)
mp.xlabel('x', fontsize=14)
mp.ylabel('y', fontsize=14)
mp.tick_params(labelsize=10)
mp.pcolormesh(grid_x, grid_y, grid_z, cmap='gray')
mp.scatter(test_x[:, 0], test_x[:, 1], c=test_y, cmap='brg', s=80)
# 新增点 输出置信概率
prob_x = np.array([
[2, 1.5],
[8, 9],
[4.8, 5.2],
[4, 4],
[2.5, 7],
[7.6, 2],
[5.4, 5.9]])
# 预测结果 绘制新增点
prd_prob_y = model.predict(prob_x)
mp.scatter(prob_x[:, 0], prob_x[:, 1], marker="D", c=prd_prob_y, cmap='jet_r', s=80)
# 获得置信概率
prob_data = model.predict_proba(prob_x)
print(prob_data)
# 绘制打印置信概率标签
for i in range(len(prob_data)):
mp.annotate(
'{}% {}%'.format(
round(prob_data[i, 0] * 100, 2),
round(prob_data[i, 1] * 100, 2)),
xy=(prob_x[i, 0], prob_x[i, 1]),
xytext=(12, -12),
textcoords='offset points',
horizontalalignment='left',
verticalalignment='top',
fontsize=9,
bbox={'boxstyle': 'round,pad=0.6',
'fc': 'orange', 'alpha': 0.8})
mp.show()