目录
two_layer_net.jpynb
导入数据,预处理
为后续代码的运行做准备
# 使得python2.x可以使用print()
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.neural_net import TwoLayerNet
%matplotlib inline
# 最临近插值
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
# 使jupyter在引用文件修改的情况下自动加载修改后的内容
%load_ext autoreload
%autoreload 2
def rel_error(x, y):
""" 返回相对误差,相对误差=绝对误差/给定真值 """
# 逐一比较两个向量的大小。当其中一个是向量,另一个是数值时,数值会拓展成向量
# np.maximum([1,2,3],[2,1,4])=[2,2,4]
return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
创建一个较小的网络,并构造少量数据来检查代码的正确性
注意:这里设置了固定的随机种子,所以结果可以重现
# 设置网络结构的参数
input_size = 4
hidden_size = 10
num_classes = 3
num_inputs = 5
# 初始化一个两层网络
def init_toy_model():
np.random.seed(0)
return TwoLayerNet(input_size, hidden_size, num_classes, std=1e-1)
# 构造输入、输出
def init_toy_data():
np.random.seed(1)
# 生成(5,4)大小的矩阵
X = 10 * np.random.randn(num_inputs, input_size)
y = np.array([0, 1, 2, 2, 1])
return X, y
net = init_toy_model()
X, y = init_toy_data()
验证损失、梯度的正确性
下面的代码应该在完成neural_net.py里的loss()后再运行
检验得分、损失和梯度的代码是否正确
# 在没有传入y的情况下,net.loss()返回的是得分
scores = net.loss(X)
print('Your scores:')
print(scores)
print()
print('correct scores:')
correct_scores = np.asarray([
[-0.81233741, -1.27654624, -0.70335995],
[-0.17129677, -1.18803311, -0.47310444],
[-0.51590475, -1.01354314, -0.8504215 ],
[-0.15419291, -0.48629638, -0.52901952],
[-0.00618733, -0.12435261, -0.15226949]])
print(correct_scores)
print()
# 两个得分的误差应该小于1e-7
print('Difference between your scores and correct scores:')
print(np.sum(np.abs(scores - correct_scores)))
# Difference between your scores and correct scores:
# 3.6802720496109664e-08
loss, _ = net.loss(X, y, reg=0.05)
correct_loss = 1.30378789133
# 两个损失的差异应该小于1e-12
print('Difference between your loss and correct loss:')
print(np.sum(np.abs(loss - correct_loss)))
from cs231n.gradient_check import eval_numerical_gradient
# 使用数值梯度来检验代码的正确性,如果代码正确,那么对于W1,W2,b1和b2来说,梯度的相对误差都应该小于1e-8
loss, grads = net.loss(X, y, reg=0.05)
for param_name in grads:
# f(W)返回在这个参数的情况下的损失
f = lambda W: net.loss(X, y, reg=0.05)[0]
param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False)
print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))
# W1 max relative error: 3.561318e-09
# W2 max relative error: 3.440708e-09
# b1 max relative error: 2.738423e-09
# b2 max relative error: 4.447677e-11
验证模型的正确性
完成neural_net.py里的train()和predict()后再运行下面代码
net = init_toy_model()
# 训练神经网络
stats = net.train(X, y, X, y,
learning_rate=1e-1, reg=5e-6,
num_iters=100, verbose=False)
# 输出最终损失,如果代码正确,最终损失应该小于0.2
print('Final training loss: ', stats['loss_history'][-1])
# Final training loss: 0.017149607938731968
# 画出损失随迭代次数改变的图像
plt.plot(stats['loss_history'])
plt.xlabel('iteration')
plt.ylabel('training loss')
plt.title('Training Loss history')
plt.show()
加载CIFAR-10数据,并进行预处理
from cs231n.data_utils import load_CIFAR10
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
# 加载CIFAR-10数据
cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# 选取后1000个数据作为验证数据集
mask = list(range(num_training, num_training + num_validation))
X_val = X_train[mask]
y_val = y_train[mask]
# 前49000个数据作为训练数据集
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]
# 选取1000个数据作为测试数据集
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]
# 减去均值图像,零中心化
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# 由于是全连接网络,所以把每张图片拉伸为行向量
X_train = X_train.reshape(num_training, -1)
X_val = X_val.reshape(num_validation, -1)
X_test = X_test.reshape(num_test, -1)
return X_train, y_train, X_val, y_val, X_test, y_test
# 如果之前加载过数据,先删除之前的数据,防止内存溢出
try:
del X_train, y_train
del X_test, y_test
print('Clear previously loaded data.')
except:
pass
# 调用上面的函数获取数据
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)