- 定义输入层隐藏层和输出层维度
def layer_sizes(X, Y, h=4):
n_x = X.shape[0]
n_h = h
n_y = Y.shape[0]
return (n_x, n_h, n_y)
- 定义激活函数
def sigmoid(x):
return 1/(1+np.exp(-x))
- 初始化参数
def initialize_parameters(n_x, n_y, h):
params = {
'w1': np.random.random((h, n_x)),
'w2': np.random.random((n_y, h)),
'b1': np.zeros((h, 1)),
'b2': np.zeros((n_y, 1))
}
return params
- 前向传播
def forward_propagation(X, Y, params):
m = X.shape[0]
w1 = params['w1']
w2 = params['w2']
b1 = params['b1']
b2 = params['b2']
z1 = w1.dot(X)+b1
a1 = np.tanh(z1)
z2 = w2.dot(a1)+b2
a2 = sigmoid(z2)
cache = {
'z1': z1,
'a1': a1,
'z2': z2,
'a2': a2
}
return cache
- 反向传播
def backward_propagation(X, Y, cache, params):
m = X.shape[0]
w2 = params['w2']
a1 = cache['a1']
a2 = cache['a2']
dz2 = a2 - Y
dw2 = dz2.dot(a1.T)
db2 = 1/m*np.sum(dz2, axis=1, keepdims=True)
dz1 = w2.T.dot(dz2)*(1-a1**2)
dw1 = dz1.dot(X.T)
db1 = 1/m*np.sum(dz1, axis=1, keepdims=True)
grads = {
'dw1': dw1,
'dw2': dw2,
'db1': db1,
'db2': db2
}
return grads
- 梯度更新
def update_params(grads, params, learning_rate):
w1 = params['w1'] - learning_rate * grads['dw1']
w2 = params['w2'] - learning_rate * grads['dw2']
b1 = params['b1'] - learning_rate * grads['db1']
b2 = params['b2'] - learning_rate * grads['db2']
params = {
'w1': w1,
'w2': w2,
'b1': b1,
'b2': b2
}
return params
- 计算损失函数
def compute_cost(a2, Y):
m = Y.shape[0]
logprobs = Y*np.log(a2)+(1-Y)*np.log(1-a2)
cost = -1/m*np.sum(logprobs)
cost = np.squeeze(cost)
return cost
- 模型封装
def nn_model(X, Y, iterations=10000, learning_rate=0.01):
n_x, n_h, n_y = layer_sizes(X, Y)
params = initialize_parameters(n_x, n_y, n_h)
for i in range(iterations):
cache = forward_propagation(X, Y, params)
grads = backward_propagation(X, Y, cache, params)
params = update_params(grads, params, learning_rate)
cost = compute_cost(cache['a2'], Y)
if i%100==0:
print('cost: ', cost)
return params