import numpy as np
def sigmoid(x):
"""
Compute the sigmoid function for the input here.
"""
### YOUR CODE HERE
f=1/(1+np.exp(-x)) ##sigmoid函数定义
### END YOUR CODE
return f
def sigmoid_grad(f):
"""
Compute the gradient for the sigmoid function here. Note that
for this implementation, the input f should be the sigmoid
function value of your original input x.
"""
### YOUR CODE HERE
f=f*(1-f) ###由(a)题可以得到sigmoid函数求导的简化形式
### END YOUR CODE
return f
def test_sigmoid_basic():
"""
Some simple tests to get you started.
Warning: these are not exhaustive.
"""
print("Running basic tests...")
x = np.array([[1, 2], [-1, -2]])
f = sigmoid(x)
g = sigmoid_grad(f)
print(f)
assert np.amax(f - np.array([[0.73105858, 0.88079708],
[0.26894142, 0.11920292]])) <= 1e-6
print(g)
assert np.amax(g - np.array([[0.19661193, 0.10499359],
[0.19661193, 0.10499359]])) <= 1e-6
print("You should verify these results!\n")
if __name__ == "__main__":
test_sigmoid_basic();
梯度校验:
f
′
=
l
i
m
h
→
0
f
(
x
+
h
)
−
f
(
x
−
h
)
2
∗
h
f^{'}=lim_{h\to{0}}\frac{f(x+h)-f(x-h)}{2*h}
f′=limh→02∗hf(x+h)−f(x−h)
import numpy as np
import random
# First implement a gradient checker by filling in the following functions
def gradcheck_naive(f, x):
"""
Gradient check for a function f
- f should be a function that takes a single argument and outputs the cost and its gradients
- x is the point (numpy array) to check the gradient at
"""
#设置种子,保证在梯度校验中,两次求f的随机值是一致的
rndstate = random.getstate()
random.setstate(rndstate)
fx, grad = f(x) # Evaluate function value at original point
h = 1e-4
# Iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index
### try modifying x[ix] with h defined above to compute numerical gradients
### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it
### possible to test cost functions with built in randomness later
### YOUR CODE HERE:
# raise NotImplementedError
### END YOUR CODE
x[ix] += h
f_1 = f(x)[0] # 返回函数值和梯度
x[ix] -= 2 * h
f_2 = f(x)[0]
numgrad = (f_1 - f_2) / (2 * h)
numgrad = np.sum(numgrad) # 可能返回函数值是一个向量或者矩阵,直接求sum
# Compare gradients
reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
#grad[ix]就是计算的梯度在ix位置的值,而numgrad是根据梯度校验公式计算得到的梯度
if reldiff > 1e-5:
print("Gradient check failed.")
print("First gradient error found at index %s" % str(ix))
print("Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad))
return
it.iternext() # Step to next dimension
print("Gradient check passed!")
def sanity_check():
"""
Some basic sanity checks.
"""
quad = lambda x: (np.sum(x ** 2), x * 2)
print("Running sanity checks...")
gradcheck_naive(quad, np.array(123.456)) # scalar test
gradcheck_naive(quad, np.random.randn(3,)) # 1-D test
gradcheck_naive(quad, np.random.randn(4,5)) # 2-D test
if __name__ == "__main__":
sanity_check()
import numpy as np
import random
from q1_softmax import softmax
from q2_sigmoid import sigmoid, sigmoid_grad
from q2_gradcheck import gradcheck_naive
def forward_backward_prop(data, labels, params, dimensions):
"""
Forward and backward propagation for a two-layer sigmoidal network
Compute the forward propagation and for the cross entropy cost,
and backward propagation for the gradients for all parameters.
"""
### Unpack network parameters (do not modify)
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H))
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
#前向传播
#由(c)可知:
Z1 = np.matmul(X, W1) + b1
h = sigmoid(Z1)
Z2 = np.matmul(h, W2) + b2
y = softmax(Z2)
cost = np.sum(-labels * np.log(y)) / X.shape[0]
#反向传播
#由(c)中的求导过程可知:
grady = (y - labels) / X.shape[0]
gradW2 = np.matmul(h.T, grady)
gradb2 = np.sum(grady, axis=0, keepdims=True)
gradh = np.matmul(grady, W2.T)
gradh = sigmoid_grad(h) * gradh
gradW1 = np.matmul(X.T, gradA1)
gradb1 = np.sum(gradh, axis=0, keepdims=True)
grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
gradW2.flatten(), gradb2.flatten()))
### Stack gradients (do not modify)
grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
gradW2.flatten(), gradb2.flatten()))
return cost, grad
def sanity_check():
"""
Set up fake data and parameters for the neural network, and test using
gradcheck.
"""
print("Running sanity check...")
N = 20#unit 个数
dimensions = [10, 5, 10]#维度
data = np.random.randn(N, dimensions[0]) # each row will be a datum 输入
labels = np.zeros((N, dimensions[2]))# 输出
for i in range(N):
labels[i,random.randint(0,dimensions[2]-1)] = 1#产生one-hot 矩阵
params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
dimensions[1] + 1) * dimensions[2], )#网络参数
gradcheck_naive(lambda params: forward_backward_prop(data, labels, params,
dimensions), params)
if __name__ == "__main__":
sanity_check()