import numpy as np
import random
def sigmoid(x):
return 1.0/(1.0+np.exp(x))
def sigmoid_daoshu(x):
return sigmoid(x)(1-sigmoid(x))
# 不用调用nn.Module
class MLP:
def __init__(self, sizes):
# [784, 30, 10]
self.sizes = sizes
# 思路值得学习!
# [[30, 784],[10, 30]]
self.w = [np.random.randn(b,a) for a,b in zip(sizes[:-1],sizes[1:])]
# [[30,1],[10,1]]
self.b = [np.random.randn(a,1) for a in sizes[1:]]
# 返回输出层y_pred
def forward(self, x):
for w,b in zip(self.w,self.b):
# [30,784]@[784,1]+[30,1] = [30,1]
# [10, 30]@[30, 1]+[10,1] = [10,1]
z = np.dot(w,x) + b
x = sigmoid(z)
return x
# 返回w,b的梯度和loss
def backprop(self,x,y):
# 向量微分算子nabla
# [[30, 784],[10, 30]]
nabla_w = [np.zeros(w.shape) for w in self.w]
# [[30,1],[10,1]]
nabla_b = [np.zeros(b.shape) for b in self.b]
# 1. 前向 —— 得到每层结果和输出层的loss
# 保存每层的激活函数数据
activations = [x]
activation = x
# 保存每层的计算结果z
zs = []
for w,b in zip(self.w,self.b):
z = np.dot(w,activation) + b
activation = sigmoid(z)
activations.append(sigmoid(z))
zs.append(z)
loss = np.power(activations[-1]-y, 2).sum()
# 2. 反向
# 2.1 计算输出层梯度
# Δ = Ok(1-Ok)(Ok-tk)
delta = activations[-1]*(1-activations[-1])*(activations[-1]-y)
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta,activation[-2].T)
# 2.2 计算隐藏层梯度
for l in range(2,self.num_layers+1):
l = -l
z = zs[l]
a = activations[l]
delta = np.dot(self.w[l+1].T, delta)*a*(1-a)
nabla_b[l] = delta
nabla_w[l] = np.dot(delta, activations[l-1].T)
return nabla_w, nabla_b, loss
# 梯度下降算法
def train(self, training_data, epochs, batchsz, lr, test_data):
if test_data:
n_test =len(test_data)
n = len(training_data)
for i in range(epochs):
random.shuffle(training_data)
mini_batches={training_data[k:k+batchsz] for k in range(0,n,batchsz)}
# 根据平均梯度更新w
for mini_batch in mini_batches:
loss = self.update_mini_batch(mini_batch,lr)
print('loss:',loss)
if test_data:
print('Epoch {}:{}/{}'.format(i,self.evaluate(test_data), n_test, loss))
else:
print('Epoch {} complete'.format(i))
def update_mini_batch(self, mini_batch, lr): # 更新w和b,计算loss