python sklearn 数据集划分与交叉验证
下载地址
链接: https://pan.baidu.com/s/1aB5B_n9HyHEiCuPCgva2-g 提取码: 48ny
"""
数据集划分:将数据集 划分为 训练集与测试集
交叉验证:交叉验证 将训练集分成几份验证 并分别求出分数
"""
import numpy as np
import sklearn.naive_bayes as nb
import matplotlib.pyplot as mp
import sklearn.model_selection as ms
# 加载数据
data = np.loadtxt("./multiple1.txt", delimiter=",")
x = data[:, :2].astype("float")
y = data[:, -1].astype("float")
print(x.shape, x.dtype)
print(y.shape, y.dtype)
# 数据划分
train_x, test_x, train_y, test_y = \
ms.train_test_split(x, y, test_size=0.25, random_state=7)
# 构建高斯朴素贝叶斯模型
model = nb.GaussianNB()
# 交叉验证 4种评判标准
ac_score = ms.cross_val_score(model, train_x, train_y, cv=5, scoring="accuracy")
print(ac_score.mean())
ac_score = ms.cross_val_score(model, train_x, train_y, cv=5, scoring="precision_weighted")
print(ac_score.mean())
ac_score = ms.cross_val_score(model, train_x, train_y, cv=5, scoring="recall_weighted")
print(ac_score.mean())
ac_score = ms.cross_val_score(model, train_x, train_y, cv=5, scoring="f1_weighted")
print(ac_score.mean())
model.fit(train_x, train_y)
# 输出模型的预测效果
prd_test_y = model.predict(test_x)
acc = np.sum(prd_test_y == test_y) / test_y.size
print(acc)
# 准备分类边界数据
left, right = x[:, 0].min() - 1, x[:, 0].max() + 1
bottom, top = x[:, 1].min() - 1, x[:, 1].max() + 1
# 可视区域 划分维 500*500的区域
n = 500
grid_x, grid_y = np.meshgrid(np.linspace(left, right, n), np.linspace(bottom, top, n))
mesh_x = np.column_stack((grid_x.ravel(), grid_y.ravel()))
mesh_z = model.predict(mesh_x)
grid_z = mesh_z.reshape(grid_x.shape)
# 绘制散点、分类结果
mp.figure('Naive Bayes Classification', facecolor='lightgray')
mp.title('Naive Bayes Classification', fontsize=20)
mp.xlabel('x', fontsize=14)
mp.ylabel('y', fontsize=14)
mp.tick_params(labelsize=10)
mp.pcolormesh(grid_x, grid_y, grid_z, cmap='gray')
mp.scatter(test_x[:, 0], test_x[:, 1], c=test_y, cmap='brg', s=80)
mp.show()