# Single hidden layer NN(单隐层神经网络)---deeplearning.ai---笔记及Python源码（14）

##### 二、基本理论

###### （2）模型训练

$$J({w^{[1]}},{b^{[1]}},{w^{[2]}},{b^{[2]}}) = \frac{1}{m}\sum\limits_{i = 1}^m {L({a^{[2]}},y)}$$

L为损失函数，具体定义参看笔记13.

$$\begin{array}{l}\frac{{\partial J({w^{[1]}},{b^{[1]}},{w^{[2]}},{b^{[2]}})}}{{\partial {w^{[2]}}}} = d{w^{[2]}} = {a^{[1]}}({a^{[2]}} - y)\\\frac{{\partial J({w^{[1]}},{b^{[1]}},{w^{[2]}},{b^{[2]}})}}{{\partial {b^{[2]}}}} = d{b^{[2]}} = ({a^{[2]}} - y)\\\frac{{\partial J({w^{[1]}},{b^{[1]}},{w^{[2]}},{b^{[2]}})}}{{\partial {w^{[1]}}}} = d{w^{[1]}} = x({w^{[2]}}({a^{[2]}} - y)*{a^{[1]}}(1 - {a^{[1]}}))\\\frac{{\partial J({w^{[1]}},{b^{[1]}},{w^{[2]}},{b^{[2]}})}}{{\partial {b^{[1]}}}} = d{b^{[1]}} = {w^{[2]}}({a^{[2]}} - y)*{a^{[1]}}(1 - {a^{[1]}})\end{array}$$

$$\begin{array}{l}{w^{[2]}} = {w^{[2]}} - \alpha d{w^{[2]}}\\{w^{[2]}} = {w^{[2]}} - \alpha d{w^{[2]}}\\{b^{[1]}} = {b^{[1]}} - \alpha d{b^{[1]}}\\{b^{[2]}} = {b^{[2]}} - \alpha d{b^{[2]}}\end{array}$$

##### 三、向量化逻辑回归及Python源代码实例

$$X = {\left( {\begin{array}{*{20}{c}} {x_1^{(1)}}& \cdots &{x_1^{(m)}}\\ \vdots & \vdots & \vdots \\ {x_{n\_x}^{(1)}}& \cdots &{x_{n\_x}^{(m)}} \end{array}} \right)_{n\_x \times m}}$$

$$Y = {\left( \begin{array}{l} {y_1}\\ {\rm{ }} \vdots \\ {y_m} \end{array} \right)_{m \times 1}}$$

$${W^{[1]}} = {\left( {\begin{array}{*{20}{c}}{w_1^{[1]}}& \cdots &{w_1^{[{n^[}^{1]}]}}\\ \vdots & \vdots & \vdots \\{w_{n\_x}^{[1]}}& \cdots &{x_{n\_x}^{[{n^[}^{1]}]}}\end{array}} \right)_{n\_x \times {n^{[1]}}}}$$

$${b^{[1]}} = {\left( \begin{array}{l}{b_1}{\rm{ }}\\{\rm{ }} \vdots {\rm{ }}\\{b_{{n^{[1]}}}}\end{array} \right)_{{n^{[1]}} \times 1}}$$

$${w^{[2]}} = {\left( \begin{array}{l}{w_1}{\rm{ }}\\{\rm{ }} \vdots {\rm{ }}\\{w_{n\_x}}\end{array} \right)_{n\_x \times 1}}$$

python代码：

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 16 16:02:38 2018

@author: sysu-hgavin
"""
import numpy as np
m = 10
n_x = 2
n_1 = 4 # the number of hidden layer is 4
X = np.array([[1,1],[1.5,1.5],[2,2],[2.5,2.5],[2.75,2.75],[3.15,3.15], \
[3.5,3.5],[3.75,3.75],[4,4],[4.5,4.5]])#create some examples
X = X.T # transposition
Y = np.array([[0],[0],[0],[0],[0],[1],[1],[1],[1],[1]])
#initialization
alpha = 0.1
W1 = 0.01*np.random.rand(n_x,n_1)
W2 = 0.01*np.random.rand(n_1,1)
b1 = np.zeros([n_1,1])
b2 = 0

A2 = np.zeros([m,1])
dW1 = np.zeros([n_x,n_1])
dZ1 = np.zeros([m,n_1])
db1 = np.zeros([n_1,1])
dW2 = np.zeros([n_1,1])
dZ2 = np.zeros([m,1])
db2 = 0

j = 0
for iter in range(50):
Z1 = np.dot(W1.T,X)+b1 # n_1 X m
A1 = 1/(1+np.exp(-Z1)) # n_1 X m
Z2 = np.dot(W2.T,A1)+b2# 1 X m
A2 = 1/(1+np.exp(-Z2))#  1 X m
dZ2 = A2.T - Y# m X 1
dW2 = 1/m*np.dot(A1,dZ2)#n_1 X 1
db2 = 1/m*np.sum(dZ2)# 1 X 1
dZ1 = np.multiply(np.dot(dZ2,W2.T),np.multiply(A1,(1-A1)).T)#m X n_1
dW1 = 1/m*np.dot(X,dZ1)#n_x X n_1
db1 = 1/m*np.sum(dZ1.T, axis=1, keepdims=True)# n_1 X 1
#np.multiply is like .* in matlab
j = -1/m*np.sum(np.multiply(Y.T,np.log(A2))+np.multiply((1-Y).T,np.log(1-A2)))
W1 = W1-alpha*dW1
b1 = b1- alpha*db1
W2 = W2-alpha*dW2
b2 = b2- alpha*db2
print (j)
print ('\n')
xp = np.array([[4],[3.5]])
z1p = np.dot(W1.T,xp)+b1
a1p = 1/(1+np.exp(-z1p))
z2p = np.dot(W2.T,a1p)+b2
a2p = 1/(1+np.exp(-z2p))
print (a2p)