纯torch实现BP神经网络,使用cpu,主要对torch的基本操作有个了解.
模型说明
直接读取本地文件获得数据集
单隐层神经网络,可以调整隐层神经元个数
激活函数:sigmoid
更新模型参数的原理可以参考西瓜书
初始化模型参数权重:参数正态分布
没有考虑偏置,西瓜书里面的阈值
不足
模型构件的比较生硬,就是单纯的把numpy里面的一些函数进行转换,后面会对数据集转换成batch进行训练,可以调整以GPU或CPU训练
import torch
from torchvision.datasets import mnist
import matplotlib.pyplot as plt
import os
# 激活函数
def sigmoid(x):
return 1/(1+torch.exp(-x))
# 随机显示数据集图像和标签
def showDataSet(dataSet,dataLabel):
plt.figure(figsize=(12,12))
for i in range(16):
index = torch.randint(0,len(dataSet),(1,)).item()
plt.subplot(4,4,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(dataSet[index], cmap=plt.cm.binary)
plt.xlabel(dataLabel[index].item())
plt.show()
# 下面两个函数参考文档 from torchvision.datasets import mnist里面的内容
# 读取图像的二值文件,返回的图像像素大小范围0-255
def read_from_file(path: str) -> torch.Tensor:
with open(path, 'rb') as f:
x = mnist.read_sn3_pascalvincent_tensor(f, strict=False)
assert(x.dtype == torch.uint8)
return x
# 返回列表 0:train 1:test、、 0:图像 1:标签
def load_torch_data(path = './data/MNIST/raw'):
training_set = [
read_from_file(os.path.join(path, 'train-images-idx3-ubyte')),
read_from_file(os.path.join(path, 'train-labels-idx1-ubyte'))]
test_set = [
read_from_file(os.path.join(path, 't10k-images-idx3-ubyte')),
read_from_file(os.path.join(path, 't10k-labels-idx1-ubyte'))]
return training_set,test_set
# 添加train的准确率计算
# 模型实现
def torch_mnist(trainSet,testSet,iterate=5000,learning_rate=0.5,hiden_layer=300):
# 保存所有的模型输出类别
classes = testSet[1].unique()
# 输入输出层大小
input_layer = len(trainSet[0][0])
out_layer = len(classes)
# 输出层标签二值化编码,tensor的单位矩阵
label_code = torch.eye(len(classes))
# 初始化隐藏层和输出层的模型参数
hiden_Weight = torch.randn(hiden_layer,input_layer)/torch.sqrt(torch.tensor(hiden_layer/2.0))
out_Weight = torch.randn(out_layer, hiden_layer)/torch.sqrt(torch.tensor(out_layer/2.0))
# 记录模型测试集的准确率
accuracys=[]
for n in range(iterate):
# 从训练数据中随机挑选一张进行训练
# 貌似不太合理,,可以增加batch,这个还没理解
index = torch.randint(0,len(trainSet[0]),(1,))
xinput = trainSet[0][index]
# 对输出标签进行二值化转码
yout = label_code[trainSet[1][index].item()]
# 经过隐藏层后的输出
hiden = sigmoid(torch.mm(xinput,hiden_Weight.t()))
# 模型输出
out = sigmoid(torch.mm(hiden,out_Weight.t()))
# 反向更新模型参数
g_j = out*(1-out)*(yout-out)
e_h = hiden*(1-hiden)*(torch.mm(g_j,out_Weight))
hiden_Weight += learning_rate*e_h.reshape(-1,1)*xinput
out_Weight += learning_rate*g_j.reshape(-1,1)*hiden
# 计算模型误差
# error = sum((out-yout)**2)
# 训练1000次,计算一次模型准确率
if n%1000==0:
pre_hide = sigmoid(torch.mm(testSet[0],hiden_Weight.t()))
pr_out = sigmoid(torch.mm(pre_hide,out_Weight.t()))
predictions = torch.argmax(pr_out,axis=1)
acc = torch.mean(torch.eq(predictions,testSet[1]).float())
accuracys.append(acc)
print('steps:',n,'accuracy:',acc.item())
print('max accuracy: ',max(accuracys).item())
plt.plot(accuracys)
plt.show()
# 加载 模型数据集
trainSet, testSet = load_torch_data()
# 随机显示图像及其信息
showDataSet(trainSet[0],trainSet[1])
# 将图像28*28转换为1维数据,像素值转为0-1之间
trainSet[0] = trainSet[0].reshape(60000,28*28)/255.0
testSet[0] = testSet[0].reshape(10000,28*28)/255.0
torch.random.seed()
# 训练模型
torch_mnist(trainSet,testSet,iterate=50000,learning_rate=0.5,hiden_layer=300)
运行结果: