代码来源是 :https://github.com/1601120453/cs224n-homework/blob/master/assignment1/q2_neural.py
import numpy as np
import random
from q1_softmax import softmax
from q2_sigmoid import sigmoid, sigmoid_grad
from q2_gradcheck import gradcheck_naive
def forward_backward_prop(data, labels, params, dimensions):
"""
Forward and backward propagation for a two-layer sigmoidal network
Compute the forward propagation and for the cross entropy cost,
and backward propagation for the gradients for all parameters.
Arguments:
data -- M x Dx matrix, where each row is a training example.
labels -- M x Dy matrix, where each row is a one-hot vector.
params -- Model parameters, these are unpacked for you.
dimensions -- A tuple of input dimension, number of hidden units
and output dimension
"""
### Unpack network parameters (do not modify)
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H))
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
### YOUR CODE HERE: forward propagation
N = data.shape[0]
x1 = np.dot(data, W1) + b1
a1 = sigmoid(x1)
x2 = np.dot(a1, W2) + b2
prob = softmax(x2)
cost = -np.mean(np.sum(np.multiply(np.log(prob), labels), axis=-1))
### END YOUR CODE
### YOUR CODE HERE: backward propagation
dx = (prob - labels)/N
gradb2 = np.sum(dx, axis=0)
gradW2 = np.dot(a1.T, dx)
da1 = np.dot(dx, W2.T)
dx1 = da1 * sigmoid_grad(a1)
gradb1 = np.sum(dx1, axis=0)
gradW1 = np.dot(data.T, dx1)
### END YOUR CODE
### Stack gradients (do not modify)
grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
gradW2.flatten(), gradb2.flatten()))
return cost, grad
def sanity_check():
"""
Set up fake data and parameters for the neural network, and test using
gradcheck.
"""
print "Running sanity check..."
N = 20
dimensions = [10, 5, 10]
data = np.random.randn(N, dimensions[0]) # each row will be a datum
labels = np.zeros((N, dimensions[2]))
for i in xrange(N):
labels[i, random.randint(0,dimensions[2]-1)] = 1
params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
dimensions[1] + 1) * dimensions[2], )
gradcheck_naive(lambda params:
forward_backward_prop(data, labels, params, dimensions), params)
def your_sanity_checks():
"""
Use this space add any additional sanity checks by running:
python q2_neural.py
This function will not be called by the autograder, nor will
your additional tests be graded.
"""
print "Running your sanity checks..."
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
if __name__ == "__main__":
sanity_check()
# your_sanity_checks()
本文主要是为了通过公式说明上述方案的反向过程:
分子布局:分子是列向量,分母是行向量,也称为雅各布方程
分母布局:分子是行向量,分母是列向量,也成为海森矩阵
注意:这里 J 对 x 的求导,是遵循的分子布局; J 对 W 和 b 的求导遵循的是分母布局。