接下来看一下比较完整的代码:
这个是train_neuralnet
import sys,os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet
(x_train,t_train),(x_test,t_test) = load_mnist(flatten=True,normalize=True,one_hot_label=True)
network = TwoLayerNet(input_size= 784,hidden_size=50,output_size=10)
iters_num = 10000
train_size = x_train.shape[0]# 60000
batch_size = 100
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size / batch_size,1) # 一个epoch 是600
for i in range(iters_num):#10000
batch_mask = np.random.choice(train_size,batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
grad = network.numerical_gradient(x_batch,t_batch)
for key in ('W1','b1','W2','b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x_batch,t_batch)
train_loss_list.append(loss)
if i % iter_per_epoch == 0: # i % 600
train_acc = network.accuracy(x_train,t_train)
test_acc = network.accuracy(x_test,t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("训练成功率和测试成功率分别是:",str(train_acc)," ",str(test_acc))
这个是两层网络的类,two_layer_net
import sys,os
sys.path.append(os.pardir)
from common.functions import *
from common.gradient import numerical_gradient
class TwoLayerNet:
def __init__(self,input_size,hidden_size,output_size,weight_init_std = 0.01):
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size,hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size,output_size)
self.params['b2'] = np.zeros(output_size)
def predict(self,x):
W1, W2 = self.params['W1'],self.params['W2']
b1, b2 = self.params['b1'],self.params['b2']
a1 = np.dot(x,W1)+b1
z1 = sigmoid(a1)
a2 = np.dot(z1,W2)+b2
y = softmax(a2)
return y
def loss(self,x,t):
y = self.predict(x)
return cross_entropy_error(y,t)
def accuracy(self,x,t):
y = self.predict(x)
y = np.argmax(y,axis=1)
t = np.argmax(t,axis=1)
accuracy = np.sum(y == t )/float(x.shape[0])
return accuracy
def numerical_gradient(self,x,t):
loss_W = lambda W:self.loss(x,t)
grads = {}
grads['W1'] = numerical_gradient(loss_W,self.params['W1'])
grads['b1'] = numerical_gradient(loss_W,self.params['b1'])
grads['W2'] = numerical_gradient(loss_W,self.params['W2'])
grads['b2'] = numerical_gradient(loss_W,self.params['b2'])
return grads
"""
这里还没学到,先不敲了
def gradient(self,x,t):
W1,W2 = self.params['W1'],self.params['W2']
b1,b2 = self.params['b1'],self.params['b2']
grads = {}
batch_num = x.shape[0]
a1 = np.dot(x,W1)+b1
z1 = sigmoid(a1)
a2 = np.dot(z1,W2)+b2
y = softmax(a2)
#backward
dy = (y-t)/batch_num
grads['W2'] =
"""
gradient
# coding: utf-8
import numpy as np
def _numerical_gradient_1d(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
for idx in range(x.size):
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val # 还原值
return grad
def numerical_gradient_2d(f, X):
if X.ndim == 1:
return _numerical_gradient_1d(f, X)
else:
grad = np.zeros_like(X)
for idx, x in enumerate(X):
grad[idx] = _numerical_gradient_1d(f, x)
return grad
def numerical_gradient(f, x): # f是loss,x是权重w
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h #在这里 w 被修改了
fxh1 = f(x) # f(x+h)
# 这里 f(W)其实就是 loss(x,y)
#当传入的是字典型,列表型时如果是重新对其赋值则不会改变函数外参数的值,如果是进行操作,则会改变
#https://blog.csdn.net/liuxiao214/article/details/81673093
# loss(x,y)里面有个predict 让修改的w和x相乘
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val # 还原值
it.iternext()
return grad
用到的大部分都在这里了,还有load_mnist在mnist.py 里