吴恩达深度学习L1W3
知识点:1层隐藏层的神经网络
1 安装包
略
我直接将函数复制过来
# testCases模块!!!!!!!!!!!!!!!
import numpy as np
def layer_sizes_test_case():
np.random.seed(1)
X_assess = np.random.randn(5, 3)
Y_assess = np.random.randn(2, 3)
return X_assess, Y_assess
def initialize_parameters_test_case():
n_x, n_h, n_y = 2, 4, 1
return n_x, n_h, n_y
def forward_propagation_test_case():
np.random.seed(1)
X_assess = np.random.randn(2, 3)
parameters = {'W1': np.array([[-0.00416758, -0.00056267],
[-0.02136196, 0.01640271],
[-0.01793436, -0.00841747],
[0.00502881, -0.01245288]]),
'W2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
'b1': np.array([[0.],
[0.],
[0.],
[0.]]),
'b2': np.array([[0.]])}
return X_assess, parameters
def compute_cost_test_case():
np.random.seed(1)
Y_assess = np.random.randn(1, 3)
parameters = {'W1': np.array([[-0.00416758, -0.00056267],
[-0.02136196, 0.01640271],
[-0.01793436, -0.00841747],
[0.00502881, -0.01245288]]),
'W2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
'b1': np.array([[0.],
[0.],
[0.],
[0.]]),
'b2': np.array([[0.]])}
a2 = (np.array([[0.5002307, 0.49985831, 0.50023963]]))
return a2, Y_assess, parameters
def backward_propagation_test_case():
np.random.seed(1)
X_assess = np.random.randn(2, 3)
Y_assess = np.random.randn(1, 3)
parameters = {'W1': np.array([[-0.00416758, -0.00056267],
[-0.02136196, 0.01640271],
[-0.01793436, -0.00841747],
[0.00502881, -0.01245288]]),
'W2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
'b1': np.array([[0.],
[0.],
[0.],
[0.]]),
'b2': np.array([[0.]])}
cache = {'A1': np.array([[-0.00616578, 0.0020626, 0.00349619],
[-0.05225116, 0.02725659, -0.02646251],
[-0.02009721, 0.0036869, 0.02883756],
[0.02152675, -0.01385234, 0.02599885]]),
'A2': np.array([[0.5002307, 0.49985831, 0.50023963]]),
'Z1': np.array([[-0.00616586, 0.0020626, 0.0034962],
[-0.05229879, 0.02726335, -0.02646869],
[-0.02009991, 0.00368692, 0.02884556],
[0.02153007, -0.01385322, 0.02600471]]),
'Z2': np.array([[0.00092281, -0.00056678, 0.00095853]])}
return parameters, cache, X_assess, Y_assess
def update_parameters_test_case():
parameters = {'W1': np.array([[-0.00615039, 0.0169021],
[-0.02311792, 0.03137121],
[-0.0169217, -0.01752545],
[0.00935436, -0.05018221]]),
'W2': np.array([[-0.0104319, -0.04019007, 0.01607211, 0.04440255]]),
'b1': np.array([[-8.97523455e-07],
[8.15562092e-06],
[6.04810633e-07],
[-2.54560700e-06]]),
'b2': np.array([[9.14954378e-05]])}
grads = {'dW1': np.array([[0.00023322, -0.00205423],
[0.00082222, -0.00700776],
[-0.00031831, 0.0028636],
[-0.00092857, 0.00809933]]),
'dW2': np.array([[-1.75740039e-05, 3.70231337e-03, -1.25683095e-03,
-2.55715317e-03]]),
'db1': np.array([[1.05570087e-07],
[-3.81814487e-06],
[-1.90155145e-07],
[5.46467802e-07]]),
'db2': np.array([[-1.08923140e-05]])}
return parameters, grads
def nn_model_test_case():
np.random.seed(1)
X_assess = np.random.randn(2, 3)
Y_assess = np.random.randn(1, 3)
return X_assess, Y_assess
def predict_test_case():
np.random.seed(1)
X_assess = np.random.randn(2, 3)
parameters = {'W1': np.array([[-0.00615039, 0.0169021],
[-0.02311792, 0.03137121],
[-0.0169217, -0.01752545],
[0.00935436, -0.05018221]]),
'W2': np.array([[-0.0104319, -0.04019007, 0.01607211, 0.04440255]]),
'b1': np.array([[-8.97523455e-07],
[8.15562092e-06],
[6.04810633e-07],
[-2.54560700e-06]]),
'b2': np.array([[9.14954378e-05]])}
return parameters, X_assess
# planar_utils模块!!!!!!!!!!!!!!!!!!!!
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model
def plot_decision_boundary(model, X, Y):
# Set min and max values and give it some padding
x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1
y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole grid
Z = model(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.ylabel('x2')
plt.xlabel('x1')
plt.scatter(X[0, :], X[1, :], c=Y.reshape(X[0, :].shape), cmap=plt.cm.Spectral)
def sigmoid(x):
"""
Compute the sigmoid of x
Arguments:
x -- A scalar or numpy array of any size.
Return:
s -- sigmoid(x)
"""
s = 1 / (1 + np.exp(-x))
return s
def load_planar_dataset():
np.random.seed(1)
m = 400 # number of examples
N = int(m / 2) # number of points per class
D = 2 # dimensionality
X = np.zeros((m, D)) # data matrix where each row is a single example
Y = np.zeros((m, 1), dtype='uint8') # labels vector (0 for red, 1 for blue)
a = 4 # maximum ray of the flower
for j in range(2):
ix = range(N * j, N * (j + 1))
t = np.linspace(j * 3.12, (j + 1) * 3.12, N) + np.random.randn(N) * 0.2 # theta
r = a * np.sin(4 * t) + np.random.randn(N) * 0.2 # radius
X[ix] = np.c_[r * np.sin(t), r * np.cos(t)]
Y[ix] = j
X = X.T
Y = Y.T
return X, Y
def load_extra_datasets():
N = 200
noisy_circles = sklearn.datasets.make_circles(n_samples=N, factor=.5, noise=.3)
noisy_moons = sklearn.datasets.make_moons(n_samples=N, noise=.2)
blobs = sklearn.datasets.make_blobs(n_samples=N, random_state=5, n_features=2, centers=6)
gaussian_quantiles = sklearn.datasets.make_gaussian_quantiles(mean=None, cov=0.5, n_samples=N, n_features=2,
n_classes=2, shuffle=True, random_state=None)
no_structure = np.random.rand(N, 2), np.random.rand(N, 2)
return noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure
2 数据集
首先,让我们获取处理的数据集。 以下代码会将“flower” 2分类数据集加载到变量 X 和 Y中。
使用matplotlib可视化数据集。 数据看起来像是带有一些红色(标签y = 0)和一些蓝色(y = 1)点的“花”。 我们的目标是建立一个适合该数据的分类模型。
X, Y = load_planar_dataset()
# Visualize the data:
plt.scatter(X[0, :], X[1, :], c=Y.reshape(X[0,:].shape), s=40, cmap=plt.cm.Spectral)
plt.show()
现在你有:
- 包含特征(x1,x2)的numpy数组(矩阵)X
- 包含标签(红色:0,蓝色:1)的numpy数组(向量)Y。
首先让我们深入地了解一下我们的数据。
**练习:**数据集中有多少个训练示例? 另外,变量“ X”和“ Y”的“shape”是什么?
**提示:**如何获得numpy数组的shape维度? (help)
print(X.shape)
print(X.shape[1])
print(Y.shape)
3 简单Logistic回归
# 逻辑回归
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X.T,Y.T)
#绘制决策边界
plot_decision_boundary(lambda x : clf.predict(x),X,Y)
plt.title("Logistic Regression")
plt.show()
**说明:**由于数据集不是线性可分类的,因此逻辑回归效果不佳。 让我们试试是否神经网络会做得更好吧!
4 神经网络模型
从上面我们可以得知Logistic回归不适用于“flower数据集”。现在你将训练带有单个隐藏层的神经网络。
4.1 定义神经网络结构
**练习:**定义三个变量:
- n_x:输入层的大小
- n_h:隐藏层的大小(将其设置为4)
- n_y:输出层的大小
**提示:**使用shape来找到n_x和n_y。 另外,将隐藏层大小硬编码为4。
# GRADED FUNCTION: layer_sizes
def layer_sizes(X, Y):
"""
Arguments:
X -- input dataset of shape (input size, number of examples)
Y -- labels of shape (output size, number of examples)
Returns:
n_x -- the size of the input layer
n_h -- the size of the hidden layer
n_y -- the size of the output layer
"""
### START CODE HERE ### (≈ 3 lines of code)
n_x = X.shape[0] # size of input layer
n_h = 4
n_y = Y.shape[0] # size of output layer
### END CODE HERE ###
return (n_x, n_h, n_y)
X_assess, Y_assess = layer_sizes_test_case()
(n_x, n_h, n_y) = layer_sizes(X_assess, Y_assess)
print("The size of the input layer is: n_x = " + str(n_x))
print("The size of the hidden layer is: n_h = " + str(n_h))
print("The size of the output layer is: n_y = " + str(n_y))
#The size of the input layer is: n_x = 5
#The size of the hidden layer is: n_h = 4
#The size of the output layer is: n_y = 2
4.2 初始化模型的参数
**练习:*实现函数 initialize_parameters()。
说明:
请确保参数大小正确。 如果需要,也可参考上面的神经网络图。
使用随机值初始化权重矩阵。
- 使用:np.random.randn(a,b) 0.01随机初始化维度为(a,b)的矩阵。
将偏差向量初始化为零。
- 使用:np.zeros((a,b)) 初始化维度为(a,b)零的矩阵。
def initialize_parameters(n_x,n_h,n_y):
w1 = np.random.randn(n_h,n_x) * 0.01
b1 = np.zeros((n_h,1))
w2 = np.random.randn(n_y,n_h) * 0.01
b2 = np.zeros((n_y,1))
assert (w1.shape == (n_h,n_x))
assert (b1.shape == (n_h,1))
assert (w2.shape == (n_y,n_h))
assert (b2.shape == (n_y,1))
parameters = {
"w1" : w1,
"b1" : b1,
"w2" : w2,
"b2" : b2
}
return parameters
n_x, n_h, n_y = layer_sizes(X,Y)
parameters = initialize_parameters(n_x,n_h,n_y)
print("w1 = "+str(parameters['w1']))
print("b1 = "+str(parameters['b1']))
print("w2 = "+str(parameters['w2']))
print("b2 = "+str(parameters['b2']))
4.3 循环
def sigmoid(X):
return 1 / (1+np.exp(-X))
def forward_propagation(X,parameters):
w1 = parameters['w1']
b1 = parameters['b1']
w2 = parameters['w2']
b2 = parameters['b2']
Z1 = w1 @ X + b1
A1 = np.tanh(Z1)
Z2 = w2 @ A1 + b2
A2 = sigmoid(Z2)
assert (A2.shape == (1,X.shape[1]))
cache = {
"Z1" : Z1,
"A1" : A1,
"Z2" : Z2,
"A2" : A2
}
return A2,cache
def compute_cost(A2,Y):
m = Y.shape[1]
cost = - 1/m * np.sum(Y * np.log(A2) + (1-Y) * np.log(1- A2))
cost = np.squeeze(cost)
return cost
# 正向传播
A2,cache = forward_propagation(X,parameters)
# 损失函数
cost = compute_cost(A2,Y)
print(cost)
现在,通过使用在正向传播期间计算的缓存,你可以实现后向传播。
**问题:**实现函数backward_propagation()。
说明:
反向传播通常是深度学习中最难(最数学)的部分。为了帮助你更好地了解,我们提供了反向传播课程的幻灯片。你将要使用此幻灯片右侧的六个方程式以构建向量化实现。
def backward_propagation(parameters,cache,X,Y):
A1 = cache['A1']
A2 = cache['A2']
w2 = parameters['w2']
m = X.shape[1]
dz2 = A2 - Y
dw2 = 1 / m * (dz2 @ A1.T)
db2 = 1 / m * np.sum(dz2,axis=1,keepdims=True)
dz1 = w2.T @ dz2 * (1-np.power(A1,2))
dw1 = 1 / m * (dz1 @ X.T)
db1 = 1 / m * np.sum(dz1,axis=1,keepdims=True)
grads = {"dw1": dw1,
"db1": db1,
"dw2": dw2,
"db2": db2}
return grads
grads = backward_propagation(parameters,cache,X,Y)
print ("dw1 = "+ str(grads["dw1"]))
print ("db1 = "+ str(grads["db1"]))
print ("dw2 = "+ str(grads["dw2"]))
print ("db2 = "+ str(grads["db2"]))
def update_parameters(parameters,grads,learning_rate):
w1 = parameters['w1']
b1 = parameters['b1']
w2 = parameters['w2']
b2 = parameters['b2']
dw1 = grads['dw1']
db1 = grads['db1']
dw2 = grads['dw2']
db2 = grads['db2']
w1 = w1 - learning_rate * dw1
b1 = b1 - learning_rate * db1
w2 = w2 - learning_rate * dw2
b2 = b2 - learning_rate * db2
param = {
'w1':w1,
'b1':b1,
'w2':w2,
'b2':b2
}
return param
param = update_parameters(parameters,grads,learning_rate=1.2)
print("w1 = " + str(param["w1"]))
print("b1 = " + str(param["b1"]))
print("w2 = " + str(param["w2"]))
print("b2 = " + str(param["b2"]))
4.4 在nn_model()中集成4.1、4.2和4.3部分中的函数
**问题:**在nn_model()中建立你的神经网络模型。
**说明:**神经网络模型必须以正确的顺序组合先前构建的函数。
def nn_model(X,Y,iterations):
np.random.seed(3)
n_x,n_h,n_y = layer_sizes(X,Y)
parameters = initialize_parameters(n_x,n_h,n_y) # 初始化参数
for i in range(0,iterations):
A2, cache = forward_propagation(X, parameters) # 前向传播
cost = compute_cost(A2,Y) # 计算代价函数
grads = backward_propagation(parameters, cache, X, Y) # 后向传播
param = update_parameters(parameters,grads,learning_rate=1.2) # 迭代更新参数
return param
X_assess, Y_assess = nn_model_test_case()
param= nn_model(X_assess, Y_assess, iterations=10000)
print("W1 = " + str(param["w1"]))
print("b1 = " + str(param["b1"]))
print("W2 = " + str(param["w2"]))
print("b2 = " + str(param["b2"]))
4.5 预测
def predict(param,X):
A2,cache = forward_propagation(X,param) # 前向传播
predictions = np.round(A2) # 四舍五入
return predictions
predictions = predict(parameters, X_assess)
print("predictions mean = " + str(np.mean(predictions)))
# predictions mean = 0.6666666666666666