双层神经网络运行结果及loss-iterations图
Epoch 0 / iter 0, loss = 3.4504
Epoch 0 / iter 1, loss = 8.5464
Epoch 0 / iter 2, loss = 23.0399
Epoch 0 / iter 3, loss = 50.6046
Epoch 0 / iter 4, loss = 43.8836
Epoch 1 / iter 0, loss = 5.1435
Epoch 1 / iter 1, loss = 1.7764
Epoch 1 / iter 2, loss = 0.4394
Epoch 1 / iter 3, loss = 0.4025
Epoch 1 / iter 4, loss = 0.0800
Epoch 2 / iter 0, loss = 0.2347
Epoch 2 / iter 1, loss = 0.1533
Epoch 2 / iter 2, loss = 0.1512
Epoch 2 / iter 3, loss = 0.1295
Epoch 2 / iter 4, loss = 0.1740
Epoch 3 / iter 0, loss = 0.1613
Epoch 3 / iter 1, loss = 0.0865
Epoch 3 / iter 2, loss = 0.0842
Epoch 3 / iter 3, loss = 0.1156
Epoch 3 / iter 4, loss = 0.0610
Epoch 4 / iter 0, loss = 0.0674
Epoch 4 / iter 1, loss = 0.0686
Epoch 4 / iter 2, loss = 0.0523
Epoch 4 / iter 3, loss = 0.1257
Epoch 4 / iter 4, loss = 0.1288
Epoch 5 / iter 0, loss = 0.1331
Epoch 5 / iter 1, loss = 0.0630
Epoch 5 / iter 2, loss = 0.0693
Epoch 5 / iter 3, loss = 0.0371
Epoch 5 / iter 4, loss = 0.0428
Epoch 6 / iter 0, loss = 0.0605
Epoch 6 / iter 1, loss = 0.0761
Epoch 6 / iter 2, loss = 0.0863
Epoch 6 / iter 3, loss = 0.0470
Epoch 6 / iter 4, loss = 0.0290
Epoch 7 / iter 0, loss = 0.0617
Epoch 7 / iter 1, loss = 0.0521
Epoch 7 / iter 2, loss = 0.0759
Epoch 7 / iter 3, loss = 0.0528
Epoch 7 / iter 4, loss = 0.0860
Epoch 8 / iter 0, loss = 0.0725
Epoch 8 / iter 1, loss = 0.0507
Epoch 8 / iter 2, loss = 0.0596
Epoch 8 / iter 3, loss = 0.0529
Epoch 8 / iter 4, loss = 0.0666
Epoch 9 / iter 0, loss = 0.0620
Epoch 9 / iter 1, loss = 0.0521
Epoch 9 / iter 2, loss = 0.0409
Epoch 9 / iter 3, loss = 0.0642
Epoch 9 / iter 4, loss = 0.0212
Epoch 10 / iter 0, loss = 0.0442
Epoch 10 / iter 1, loss = 0.0522
Epoch 10 / iter 2, loss = 0.0511
Epoch 10 / iter 3, loss = 0.0643
Epoch 10 / iter 4, loss = 0.0020
Epoch 11 / iter 0, loss = 0.0481
Epoch 11 / iter 1, loss = 0.0455
Epoch 11 / iter 2, loss = 0.0420
Epoch 11 / iter 3, loss = 0.0621
Epoch 11 / iter 4, loss = 0.0025
Epoch 12 / iter 0, loss = 0.0471
Epoch 12 / iter 1, loss = 0.0491
Epoch 12 / iter 2, loss = 0.0488
Epoch 12 / iter 3, loss = 0.0452
Epoch 12 / iter 4, loss = 0.0558
Epoch 13 / iter 0, loss = 0.0337
Epoch 13 / iter 1, loss = 0.0524
Epoch 13 / iter 2, loss = 0.0528
Epoch 13 / iter 3, loss = 0.0454
Epoch 13 / iter 4, loss = 0.0120
Epoch 14 / iter 0, loss = 0.0350
Epoch 14 / iter 1, loss = 0.0406
Epoch 14 / iter 2, loss = 0.0381
Epoch 14 / iter 3, loss = 0.0489
Epoch 14 / iter 4, loss = 0.3947
Epoch 15 / iter 0, loss = 0.0831
Epoch 15 / iter 1, loss = 0.0950
Epoch 15 / iter 2, loss = 0.0452
Epoch 15 / iter 3, loss = 0.0510
Epoch 15 / iter 4, loss = 0.0272
Epoch 16 / iter 0, loss = 0.0431
Epoch 16 / iter 1, loss = 0.0413
Epoch 16 / iter 2, loss = 0.0475
Epoch 16 / iter 3, loss = 0.0248
Epoch 16 / iter 4, loss = 0.0202
Epoch 17 / iter 0, loss = 0.0441
Epoch 17 / iter 1, loss = 0.0401
Epoch 17 / iter 2, loss = 0.0385
Epoch 17 / iter 3, loss = 0.0282
Epoch 17 / iter 4, loss = 0.0066
Epoch 18 / iter 0, loss = 0.0216
Epoch 18 / iter 1, loss = 0.0468
Epoch 18 / iter 2, loss = 0.0322
Epoch 18 / iter 3, loss = 0.0365
Epoch 18 / iter 4, loss = 0.0612
Epoch 19 / iter 0, loss = 0.0333
Epoch 19 / iter 1, loss = 0.0441
Epoch 19 / iter 2, loss = 0.0309
Epoch 19 / iter 3, loss = 0.0279
Epoch 19 / iter 4, loss = 0.0064
Epoch 20 / iter 0, loss = 0.0287
Epoch 20 / iter 1, loss = 0.0371
Epoch 20 / iter 2, loss = 0.0293
Epoch 20 / iter 3, loss = 0.0351
Epoch 20 / iter 4, loss = 0.0271
Epoch 21 / iter 0, loss = 0.0409
Epoch 21 / iter 1, loss = 0.0304
Epoch 21 / iter 2, loss = 0.0317
Epoch 21 / iter 3, loss = 0.0265
Epoch 21 / iter 4, loss = 0.0231
Epoch 22 / iter 0, loss = 0.0417
Epoch 22 / iter 1, loss = 0.0269
Epoch 22 / iter 2, loss = 0.0290
Epoch 22 / iter 3, loss = 0.0297
Epoch 22 / iter 4, loss = 0.0264
Epoch 23 / iter 0, loss = 0.0390
Epoch 23 / iter 1, loss = 0.0260
Epoch 23 / iter 2, loss = 0.0232
Epoch 23 / iter 3, loss = 0.0311
Epoch 23 / iter 4, loss = 0.1139
Epoch 24 / iter 0, loss = 0.0461
Epoch 24 / iter 1, loss = 0.0375
Epoch 24 / iter 2, loss = 0.0267
Epoch 24 / iter 3, loss = 0.0334
Epoch 24 / iter 4, loss = 0.0164
Epoch 25 / iter 0, loss = 0.0249
Epoch 25 / iter 1, loss = 0.0366
Epoch 25 / iter 2, loss = 0.0213
Epoch 25 / iter 3, loss = 0.0292
Epoch 25 / iter 4, loss = 0.0191
Epoch 26 / iter 0, loss = 0.0399
Epoch 26 / iter 1, loss = 0.0151
Epoch 26 / iter 2, loss = 0.0229
Epoch 26 / iter 3, loss = 0.0377
Epoch 26 / iter 4, loss = 0.0089
Epoch 27 / iter 0, loss = 0.0286
Epoch 27 / iter 1, loss = 0.0205
Epoch 27 / iter 2, loss = 0.0193
Epoch 27 / iter 3, loss = 0.0330
Epoch 27 / iter 4, loss = 0.0506
Epoch 28 / iter 0, loss = 0.0338
Epoch 28 / iter 1, loss = 0.0219
Epoch 28 / iter 2, loss = 0.0206
Epoch 28 / iter 3, loss = 0.0303
Epoch 28 / iter 4, loss = 0.0275
Epoch 29 / iter 0, loss = 0.0362
Epoch 29 / iter 1, loss = 0.0211
Epoch 29 / iter 2, loss = 0.0241
Epoch 29 / iter 3, loss = 0.0198
Epoch 29 / iter 4, loss = 0.0087
Epoch 30 / iter 0, loss = 0.0163
Epoch 30 / iter 1, loss = 0.0310
Epoch 30 / iter 2, loss = 0.0247
Epoch 30 / iter 3, loss = 0.0245
Epoch 30 / iter 4, loss = 0.0083
Epoch 31 / iter 0, loss = 0.0249
Epoch 31 / iter 1, loss = 0.0309
Epoch 31 / iter 2, loss = 0.0197
Epoch 31 / iter 3, loss = 0.0211
Epoch 31 / iter 4, loss = 0.0063
Epoch 32 / iter 0, loss = 0.0168
Epoch 32 / iter 1, loss = 0.0293
Epoch 32 / iter 2, loss = 0.0191
Epoch 32 / iter 3, loss = 0.0244
Epoch 32 / iter 4, loss = 0.0080
Epoch 33 / iter 0, loss = 0.0174
Epoch 33 / iter 1, loss = 0.0164
Epoch 33 / iter 2, loss = 0.0277
Epoch 33 / iter 3, loss = 0.0273
Epoch 33 / iter 4, loss = 0.0051
Epoch 34 / iter 0, loss = 0.0146
Epoch 34 / iter 1, loss = 0.0287
Epoch 34 / iter 2, loss = 0.0219
Epoch 34 / iter 3, loss = 0.0220
Epoch 34 / iter 4, loss = 0.0150
Epoch 35 / iter 0, loss = 0.0267
Epoch 35 / iter 1, loss = 0.0149
Epoch 35 / iter 2, loss = 0.0274
Epoch 35 / iter 3, loss = 0.0213
Epoch 35 / iter 4, loss = 0.0090
Epoch 36 / iter 0, loss = 0.0202
Epoch 36 / iter 1, loss = 0.0150
Epoch 36 / iter 2, loss = 0.0251
Epoch 36 / iter 3, loss = 0.0234
Epoch 36 / iter 4, loss = 0.0343
Epoch 37 / iter 0, loss = 0.0212
Epoch 37 / iter 1, loss = 0.0175
Epoch 37 / iter 2, loss = 0.0315
Epoch 37 / iter 3, loss = 0.0161
Epoch 37 / iter 4, loss = 0.0122
Epoch 38 / iter 0, loss = 0.0266
Epoch 38 / iter 1, loss = 0.0173
Epoch 38 / iter 2, loss = 0.0157
Epoch 38 / iter 3, loss = 0.0215
Epoch 38 / iter 4, loss = 0.0024
Epoch 39 / iter 0, loss = 0.0132
Epoch 39 / iter 1, loss = 0.0238
Epoch 39 / iter 2, loss = 0.0250
Epoch 39 / iter 3, loss = 0.0144
Epoch 39 / iter 4, loss = 0.0458
Epoch 40 / iter 0, loss = 0.0304
Epoch 40 / iter 1, loss = 0.0174
Epoch 40 / iter 2, loss = 0.0245
Epoch 40 / iter 3, loss = 0.0121
Epoch 40 / iter 4, loss = 0.0068
Epoch 41 / iter 0, loss = 0.0201
Epoch 41 / iter 1, loss = 0.0157
Epoch 41 / iter 2, loss = 0.0222
Epoch 41 / iter 3, loss = 0.0178
Epoch 41 / iter 4, loss = 0.0090
Epoch 42 / iter 0, loss = 0.0220
Epoch 42 / iter 1, loss = 0.0177
Epoch 42 / iter 2, loss = 0.0190
Epoch 42 / iter 3, loss = 0.0154
Epoch 42 / iter 4, loss = 0.0607
Epoch 43 / iter 0, loss = 0.0169
Epoch 43 / iter 1, loss = 0.0248
Epoch 43 / iter 2, loss = 0.0180
Epoch 43 / iter 3, loss = 0.0139
Epoch 43 / iter 4, loss = 0.0290
Epoch 44 / iter 0, loss = 0.0248
Epoch 44 / iter 1, loss = 0.0134
Epoch 44 / iter 2, loss = 0.0185
Epoch 44 / iter 3, loss = 0.0171
Epoch 44 / iter 4, loss = 0.0187
Epoch 45 / iter 0, loss = 0.0232
Epoch 45 / iter 1, loss = 0.0156
Epoch 45 / iter 2, loss = 0.0143
Epoch 45 / iter 3, loss = 0.0180
Epoch 45 / iter 4, loss = 0.0054
Epoch 46 / iter 0, loss = 0.0176
Epoch 46 / iter 1, loss = 0.0221
Epoch 46 / iter 2, loss = 0.0141
Epoch 46 / iter 3, loss = 0.0138
Epoch 46 / iter 4, loss = 0.0276
Epoch 47 / iter 0, loss = 0.0160
Epoch 47 / iter 1, loss = 0.0162
Epoch 47 / iter 2, loss = 0.0222
Epoch 47 / iter 3, loss = 0.0127
Epoch 47 / iter 4, loss = 0.0273
Epoch 48 / iter 0, loss = 0.0192
Epoch 48 / iter 1, loss = 0.0205
Epoch 48 / iter 2, loss = 0.0134
Epoch 48 / iter 3, loss = 0.0146
Epoch 48 / iter 4, loss = 0.0089
Epoch 49 / iter 0, loss = 0.0177
Epoch 49 / iter 1, loss = 0.0126
Epoch 49 / iter 2, loss = 0.0135
Epoch 49 / iter 3, loss = 0.0223
Epoch 49 / iter 4, loss = 0.0133
源自AI-Studio课程《零基础实践深度学习》——3. 使用Python和NumPy构建神经网络模型
挑战题:用代码实现两层的神经网络的梯度传播,中间层的尺寸为13【房价预测案例】(教案当前的版本为一层的神经网络),如 图1 所示。
图1:两层的神经网络
其中关于梯度的求法,请参考这篇文章中的第六步[1]
下面进入代码部分:
此处省略了数据处理的部分(相信这部分对大家来说不是问题)。所以此处仅展示模型以及训练部分的代码,将以下代码copy进入部署好该项目的notebook中,即可运行。
import numpy as np
import matplotlib.pyplot as plt
class Network(object):
def __init__(self, input_size, hidden_size):
np.random.seed(0)
self.w1 = np.random.randn(input_size, hidden_size)
self.b1 = np.zeros(hidden_size)
self.w2 = np.random.randn(hidden_size, 1)
self.b2 = 0.
def forward(self, x):
z1 = np.dot(x, self.w1) + self.b1
z2 = np.dot(z1, self.w2) + self.b2
return z2
def loss(self, z, y):
error = z - y
num_samples = error.shape[0]
cost = error * error
cost = np.sum(cost) / num_samples
return cost
def gradient(self, x, y):
z1 = np.dot(x, self.w1) + self.b1
z2 = np.dot(z1, self.w2) + self.b2
N = x.shape[0]
gradient_w2 = 1. / N * np.sum((z2-y)*z1, axis=0)
gradient_w2 = gradient_w2[:,np.newaxis]
gradient_b2 = 1. / N * np.sum((z2 - y))
gradient_w1 = 1. / N * np.sum(np.dot(z2-y,self.w2.T)*x, axis=0)
gradient_w1 = gradient_w1[:, np.newaxis]
gradient_b1 = 1. / N * np.sum(np.dot(z2-y,self.w2.T), axis=0)
return gradient_w1, gradient_b1, gradient_w2, gradient_b2
def update(self, gradient_w1, gradient_b1, gradient_w2, gradient_b2, eta=0.01):
self.w1 = self.w1 - eta * gradient_w1
self.b1 = self.b1 - eta * gradient_b1
self.w2 = self.w2 - eta * gradient_w2
self.b2 = self.b2 - eta * gradient_b2
def train(self, training_data, num_epochs, batch_size=10, eta=0.01):
n = len(training_data)
losses = []
for epoch_id in range(num_epochs):
np.random.shuffle(training_data)
mini_batches = [training_data[k:k + batch_size] for k in range(0, n, batch_size)]
for iter_id, mini_batch in enumerate(mini_batches):
x = mini_batch[:, :-1]
y = mini_batch[:, -1:]
a = self.forward(x)
loss = self.loss(a, y)
gradient_w1, gradient_b1, gradient_w2, gradient_b2 = self.gradient(x, y)
self.update(gradient_w1, gradient_b1, gradient_w2, gradient_b2, eta)
losses.append(loss)
print('Epoch {:3d} / iter {:3d}, loss = {:.4f}'.
format(epoch_id, iter_id, loss))
return losses
# 获取数据
train_data, test_data = load_data()
# 创建网络
net = Network(input_size=13, hidden_size=13)
# 启动训练
losses = net.train(train_data, num_epochs=50, batch_size=100, eta=0.1)
# 画出损失函数的变化趋势
plot_x = np.arange(len(losses))
plot_y = np.array(losses)
plt.plot(plot_x, plot_y)
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.show()
总结
- 此处仅展示可执行的代码,对于其中的原理未作说明;
- 需要详细了解梯度计算规则的朋友,请参阅链式法则求导的相关文章;
- 温馨提示:在做求导的时候一定注意张量的形状。