引入了手写读取mnist数据集的模块Read_File+内置可视化函数
Read_File模块参见https://blog.csdn.net/wangdiedang/article/details/125335812?spm=1001.2014.3001.5502
# creator : wangdiedang
# time : 2022/6/7 11:57
# filename : Bayes.py
# 引入读文件模块
import Read_File as RF
import numpy as np
from collections import Counter
import time
def featureExtraction(img, dim, num):
res = np.empty((dim, dim))
for i in range(0, dim):
for j in range(0, dim):
# 算出每一个片区像素点的个数 若大于某一特定数则设为1
tmp = img[num * i:num * (i + 1), num * j:num * (j + 1)].sum()
if tmp > max((28 // dim - 1), 1):
res[i, j] = 1
else:
res[i, j] = 0
return res
# 输入图像集和转化维度
def Extraction2AllImgs(imgs, dim):
res = np.empty((imgs.shape[0], dim, dim))
num = 28 // dim
for k, img in enumerate(imgs):
# 对于每一个图像进行特征降维
res[k] = featureExtraction(imgs[k], dim, num)
return res
def read_data(dim=7):
# 返回生数据 二值图像
a, b, c, d = RF.read_main()
RF.show_img(a)
# 降低训练集和测试集的特征维度 初始28*28转化为dim*dim
if dim < 28:
a = Extraction2AllImgs(a, dim)
c = Extraction2AllImgs(c, dim)
return a, b, c, d
# 训练所有图片
def trainsAllImgs(train_imgs, train_labels):
# shape 为(60000, dim, dim)
total, row, col = train_imgs.shape
# 统计出标签在训练集中总计个数
cnt = Counter(train_labels)
# 初始化先验概率矩阵 维度为(10,)
priori_ = np.empty(10)
for i in range(10):
# 拉普拉斯修正
priori_[i] = (cnt[i] + 1) / (total + 10)
# 将图像矩阵转化为二维矩阵方便计算 shape为(60000, dim*dim)
new_train_imgs = train_imgs.reshape(total, row * col)
# 初始化后验概率矩阵 维度为(10, dim*dim)
posterior_ = np.empty((10, row * col))
for i in range(10):
# 在为某一数时,某一像素点取值为1的频率 取值为0的频率为 1 - posterior_[i]
# 拉普拉斯修正
posterior_[i] = (new_train_imgs[train_labels == i].sum(axis=0) + 1) / (cnt[i] + 2)
return priori_, posterior_
# 利用 先验和后验概率进行极大似然估计分类
def bayesClassifier_MLE(test_imgs, priori_, posterior_):
row, col = test_imgs.shape
new_test_imgs = test_imgs.reshape(row * col)
# 初始化标签对应属性的取值概率
each_P = np.empty(10)
for j in range(10):
tmpsum = 0
# 对数似然求和 防止连乘下溢
for i, c in enumerate(new_test_imgs):
if new_test_imgs[i] == 0:
tmpsum += np.log(1 - posterior_[j][i])
else:
tmpsum += np.log(posterior_[j][i])
each_P[j] = np.log(priori_[j]) + tmpsum
return np.argmax(each_P)
def evaluate_Bayes_model_MLE(train_imgs, train_labels, test_imgs, test_labels):
# forecast = np.empty(test_labels.shape[0])
# 初始化预测正确的个数
correctCnt = 0
# 使用测试集训练并返回先验概率和后验概率
print("-----训练模型获得模型的先验和后验概率-----")
priori_, posterior_ = trainsAllImgs(train_imgs, train_labels)
print("-----成功获得模型的先验和后验概率-----")
# print(priori_)
# print(posterior_)
# 利用先验概率和后验概率进行对测试集的预测
print("-----将测试集载入模型进行精确度评估-----")
for i, img in enumerate(test_imgs):
t = bayesClassifier_MLE(test_imgs[i], priori_, posterior_)
# print(t, test_labels[i])
if t == test_labels[i]:
correctCnt += 1
# forecast[i] = bayesClassifier_MLE(img, priori_, posterior_)
current_time = time.time()
print("-----模型评估结束-----")
print("--------------------------------------------------------------")
print("训练模型样本:%d,样本尺寸(%d, %d)" % (60000, dim, dim))
print("总样本: %d, 预测成功数: %d, 预测成功率: %.3f" % (test_labels.shape[0], correctCnt, correctCnt / test_labels.shape[0] * 100) + "%")
print("运行时间为" + str(current_time - old_time) + "s")
print("--------------------------------------------------------------")
if __name__ == '__main__':
old_time = time.time()
print("-----读取数据集-----")
dim = 28
train_imgs, train_labels, test_imgs, test_labels = read_data(dim)
RF.show_img(train_imgs)
print("-----读取成功开始训练-----")
# 开始训练
evaluate_Bayes_model_MLE(train_imgs, train_labels, test_imgs, test_labels)