版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/weixin_42432468
学习心得:
1、每周的视频课程看一到两遍
2、做笔记
3、做每周的作业练习,这个里面的含金量非常高。先根据notebook过一遍,掌握后一定要自己敲一遍,这样以后用起来才能得心应手。
1、Load Dataset
2、算法代码实现
2.1、初始化参数
2.2、正向传播相关函数
2.3、计算cost
2.4、反向传播相关函数
2.5、参数更新
3、预测
# import packages
import numpy as np
import matplotlib.pyplot as plt
# from reg_utils import sigmoid, relu, plot_decision_boundary, initialize_parameters, load_2D_dataset, predict_dec
# from reg_utils import compute_cost, predict, forward_propagation, backward_propagation, update_parameters
from reg_utils import load_2D_dataset
import sklearn
import sklearn.datasets
import scipy.io
from testCases_improve_regulariation import *
%matplotlib inline
plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
1、Load Dataset
train_X, train_Y, test_X, test_Y = load_2D_dataset()
#查看获得数据集到底是个啥东西,类型、形状、第一个实例
print ('train_X:\n',type(train_X),train_X.shape,'\n')
print (train_X[:,0])
print ('test_X:\n',type(test_X),test_X.shape,'\n')
print (test_X[:,0])
train_X:
<class 'numpy.ndarray'> (2, 211)
[-0.158986 0.423977]
test_X:
<class 'numpy.ndarray'> (2, 200)
[-0.35306235 -0.67390181]
2、算法代码实现
2.1、初始化参数
def initialize_parameters(layer_dims,initialization='he'):
np.random.seed(3)
L = len(layer_dims)
pars = {}
if initialization == 'zeros':
for l in range(1,L):
pars['W'+str(l)] = np.zeros((layer_dims[l],layer_dims[l-1]))
pars['b'+str(l)] = np.zeros((layer_dims[l],1))
elif initialization == 'random':
for l in range(1,L):
# pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*10
pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])
pars['b'+str(l)] = np.zeros((layer_dims[l],1))
elif initialization == 'he':
for l in range(1,L):
# pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])* np.sqrt(2./layer_dims[l-1])
pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])* np.sqrt(1./layer_dims[l-1])
pars['b'+str(l)] = np.zeros((layer_dims[l],1))
return pars
# test initialize_parameters function
pars_test = initialize_parameters([3,2,1],initialization='he')
print (pars_test)
pars_test = initialize_parameters([3,2,1],initialization='random')
print (pars_test)
{'W1': array([[ 1.03266513, 0.25201908, 0.05571284],
[-1.07588801, -0.16015015, -0.20482019]]), 'b1': array([[0.],
[0.]]), 'W2': array([[-0.05850706, -0.44335643]]), 'b2': array([[0.]])}
{'W1': array([[ 1.78862847, 0.43650985, 0.09649747],
[-1.8634927 , -0.2773882 , -0.35475898]]), 'b1': array([[0.],
[0.]]), 'W2': array([[-0.08274148, -0.62700068]]), 'b2': array([[0.]])}
2.2、正向传播相关函数
def linear_forward(A,W,b,keep_prob=1,regularization=None):
np.random.seed(1)
D = np.random.rand(A.shape[0],A.shape[1])
# this code for dropout
if regularization == 'dropout':
# print ('D:\n',D) #不知为啥在第二次循环的时候D2与课件里面产生的D2不一样,此处造成的差异以至于与与课件最后结果不一样
D = np.where(D <= keep_prob,1,0)
A = np.multiply(A,D)
A = A/keep_prob
#####################################
Z = np.dot(W,A) + b
cache = (A,W,b,D)
return Z,cache
#第一次随机产生的数据一样,后面的不一样
np.random.seed(1) # 放在此处,产生的结果一样
for i in range(3):
# np.random.seed(1) # 放在此处,随机数据不一样
D = np.random.rand(2,3)
print (D,'\n')
np.random.seed(1)
print ('- '*30)
D = np.random.rand(2,3)
print (D,'\n')
D = np.random.rand(2,3)
print (D,'\n')
D = np.random.rand(2,3)
print (D,'\n')
[[4.17022005e-01 7.20324493e-01 1.14374817e-04]
[3.02332573e-01 1.46755891e-01 9.23385948e-02]]
[[0.18626021 0.34556073 0.39676747]
[0.53881673 0.41919451 0.6852195 ]]
[[0.20445225 0.87811744 0.02738759]
[0.67046751 0.4173048 0.55868983]]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
[[4.17022005e-01 7.20324493e-01 1.14374817e-04]
[3.02332573e-01 1.46755891e-01 9.23385948e-02]]
[[0.18626021 0.34556073 0.39676747]
[0.53881673 0.41919451 0.6852195 ]]
[[0.20445225 0.87811744 0.02738759]
[0.67046751 0.4173048 0.55868983]]
def sigmoid_forward(Z):
'''
arguments:
x --> 自变量
returns:
s --> sigmoid(x)
'''
A = 1./(1+np.exp(-Z))
cache = Z
return A,cache
def relu_forward(Z):
'''
arguments:
x --> 自变量
returns:
s --> ReLu(x)
'''
# s = np.maximum(0.01*x,x)
A = np.maximum(0,Z)
cache = Z
return A,cache
def activation_forward(Z,activation):
if activation == 'sigmoid':
A,cache = sigmoid_forward(Z)
elif activation == 'relu':
A,cache = relu_forward(Z)
return A,cache
def linear_activation_forward(A_prev,W,b,activation,keep_prob=1,regularization=None):
Z,linear_cache = linear_forward(A_prev,W,b,keep_prob=keep_prob,regularization=regularization)
A,activation_cache = activation_forward(Z,activation)
cache = (linear_cache,activation_cache)
return A,cache
def L_model_forward(X,pars,keep_prob=1,regularization=None):
caches = []
A = X
L = len(pars)//2 + 1
np.random.seed(1)
A_prev = A
A,cache = linear_activation_forward(A_prev,pars['W1'],pars['b1'],activation='relu',keep_prob=1,regularization=None)
caches.append(cache)
# A_prev = A
# A,cache = linear_activation_forward(A_prev,pars['W2'],pars['b2'],activation='relu',keep_prob=keep_prob,regularization=regularization)
# caches.append(cache)
for l in range(2,L-1):
A_prev = A
A,cache = linear_activation_forward(A_prev,pars['W'+str(l)],pars['b'+str(l)],activation='relu',keep_prob=keep_prob,regularization=regularization)
caches.append(cache)
AL,cache = linear_activation_forward(A,pars['W'+str(L-1)],pars['b'+str(L-1)],activation='sigmoid',keep_prob=keep_prob,regularization=regularization)
caches.append(cache)
assert(AL.shape == (1,X.shape[1]))
return AL,caches
X_assess, parameters = forward_propagation_with_dropout_test_case()
A3, cache = L_model_forward(X_assess, parameters, keep_prob = 0.7,regularization='dropout')
print ("A3 = " + str(A3))
A3 = [[0.36974721 0.49683389 0.04565099 0.01446893 0.36974721]]
2.3、计算cost
def compute_cost(AL,Y,pars,lambd=0,regularization=None):
assert(AL.shape[1] == Y.shape[1])
# cost = -np.mean(Y*np.log(AL)+(1-Y)*np.log(1-AL),axis=1,keepdims=True) # 数组对应位置相乘,矩阵进行矩阵乘法
m = Y.shape[1]
# cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T)) # 一维数组对应位置求乘积求和一步进行,再求均值
# cost = np.squeeze(cost)
# print (AL)
cost = (1./m) * (-np.multiply(Y,np.log(AL)) - np.multiply(1-Y, np.log(1-AL))) # (数组和矩阵)对应位置成绩再求均值,再求和
cost = np.nansum(cost) # np.nansum,序列里面即使还有空值仍然能够进行计算
# this code for L2 regularization
if regularization == 'L2':
l2 = 0
L = int(len(pars)/2)
for l in range(1,L+1):
a = np.sum(np.square(pars['W'+str(l)]))
l2 +=a
l2 = l2*lambd/m/2
cost = cost + l2
##############################
# 三种乘法*,np.dot, np.multiply
return cost
# test compute_cost with regularization function
A3, Y_assess, parameters = compute_cost_with_regularization_test_case()
print("cost = " + str(compute_cost(A3, Y_assess, parameters, lambd = 0.1,regularization='L2')))
cost = 1.786485945159076
2.4、反向传播相关函数
def sigmoid_backrward(dA,activation_cache):
Z = activation_cache
A = 1./(1 + np.exp(-Z))
dZ = dA*A*(1-A)
return dZ
def relu_backward(dA,activation_cache):
Z = activation_cache
dZ = np.array(dA,copy=True)
assert (dZ.shape == Z.shape)
dZ[Z <= 0] = 0
return dZ
def activation_backward(dA,activation_cache,activation):
if activation == 'sigmoid':
dZ = sigmoid_backrward(dA,activation_cache)
elif activation == 'relu':
dZ = relu_backward(dA,activation_cache)
return dZ
def linear_backward(dZ,linear_cache,lambd=0,regularization=None,keep_prob=1):
A_prev, W, b ,D = linear_cache
m = A_prev.shape[1]
dA_prev = np.dot(W.T,dZ)
# this code for dropout
if regularization == 'dropout':
assert (dA_prev.shape == D.shape)
dA_prev = np.multiply(dA_prev,D)
dA_prev = dA_prev/keep_prob
######################################
dW = 1./m*np.dot(dZ,A_prev.T) #没有除以m,导致计算错误
# this code for regularization
if regularization == 'L2':
dW = dW + W*lambd/m
######################
db = np.mean(dZ,axis=1,keepdims=True) #应该使用这种方式,效果应该会更好,层数较少,神经元较少使用此种较好
# db = 1./m * np.sum(dZ) #这两种方式计算db结果为什么不一样,之前都是这么计算的啊
# 与课程结果不一致的原因,db的计算方式不一样。
return dA_prev,dW,db
def activation_linear_backward(dA,cache,activation,lambd=0,regularization=None,keep_prob=1):
linear_cache,activation_cache = cache
dZ = activation_backward(dA,activation_cache,activation)
dA_prev,dW,db = linear_backward(dZ,linear_cache,lambd=lambd,regularization=regularization,keep_prob=keep_prob)
return dA_prev,dW,db
def L_model_backward(AL,Y,caches,lambd=0,regularization=None,keep_prob=1):
Y = Y.reshape(AL.shape)
dAL = -(np.divide(Y,AL) - np.divide(1-Y,1-AL))
grads = {}
L = len(caches) + 1
current_cache = caches[L-2]
grads['dA'+str(L-1)],grads['dW'+str(L-1)],grads['db'+str(L-1)] = activation_linear_backward(dAL,current_cache,activation='sigmoid',lambd=lambd,regularization=regularization,keep_prob=keep_prob)
for l in reversed(range(L-2)):
current_cache = caches[l]
dA_prev_temp, dW_temp, db_temp = activation_linear_backward(grads['dA'+str(l+2)],current_cache,activation='relu',lambd=lambd,regularization=regularization,keep_prob=keep_prob)
grads["dA" + str(l + 1)] = dA_prev_temp
grads["dW" + str(l + 1)] = dW_temp
grads["db" + str(l + 1)] = db_temp
return grads
2.5、参数更新
def update_parameters(pars,grads,learning_rate):
L = len(pars)//2 + 1
for l in range(1,L):
pars['W'+str(l)] = pars['W'+str(l)] - learning_rate*grads['dW'+str(l)]
pars['b'+str(l)] = pars['b'+str(l)] - learning_rate*grads['db'+str(l)]
return pars
L_layer_model
def L_layer_model(X,Y,layer_dims,learning_rate = 0.01,num_iterations = 3000,print_cost=False,initialization='he',lambd=0,regularization=None,keep_prob = 1):
'''
1、初始化参数
2、根据迭代次数循环
3、正向传播
4、计算cost
5、反向传播
6、更新参数
7、输出costs和pars
'''
# np.random.seed(1)
#初始化参数
pars = initialize_parameters(layer_dims,initialization)
L = len(layer_dims)
costs = []
for i in range(0,num_iterations):
#正向传播
AL,caches = L_model_forward(X,pars)
#计算cost
cost = compute_cost(AL,Y,pars,lambd=lambd,regularization=regularization)
if i%1000 ==0 :
costs.append(cost)
if i%10000 ==0 and print_cost:
print("Cost after iteration %i: %f" %(i, cost))
#反向传播
grads = L_model_backward(AL,Y,caches,lambd=lambd,regularization=regularization)
#更新参数
pars = update_parameters(pars,grads,learning_rate)
plt.figure
plt.figure(figsize = (30,6.5))
plt.subplot(1,2,1)
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per thousands)')
plt.title("Learning rate =" + str(learning_rate))
return costs,pars
layers_dims = [2,20,3,1] # 3-layer model
# no regularization
costs_test,pars_test = L_layer_model(train_X, train_Y, layers_dims,learning_rate = 0.3, num_iterations = 30000, print_cost = True,initialization='he')
# L2 regularization
costs_test,pars_test = L_layer_model(train_X, train_Y, layers_dims,learning_rate = 0.3, num_iterations = 30000, print_cost = True,initialization='he',lambd=0.7,regularization='L2')
# # dropout
# costs_test,pars_test = L_layer_model(train_X, train_Y, layers_dims,learning_rate = 0.3, num_iterations = 30000, print_cost = True,initialization='he',lambd=0,regularization='dropout',keep_prob = 0.86)
Cost after iteration 0: 0.655741
Cost after iteration 10000: 0.163300
Cost after iteration 20000: 0.138516
Cost after iteration 0: 0.697448
Cost after iteration 10000: 0.268492
Cost after iteration 20000: 0.268092
3、预测
在指定learning_rate和num_iterations的情况下得到最优参数,再根据最优参数进行预测
def predict(X, y, parameters):
"""
This function is used to predict the results of a L-layer neural network.
Arguments:
X -- data set of examples you would like to label
parameters -- parameters of the trained model
Returns:
p -- predictions for the given dataset X
"""
m = X.shape[1]
n = len(parameters) // 2 # number of layers in the neural network
p = np.zeros((1,m))
# Forward propagation
probas, caches = L_model_forward(X, parameters)
# convert probas to 0/1 predictions
for i in range(0, probas.shape[1]):
if probas[0,i] > 0.5:
p[0,i] = 1
else:
p[0,i] = 0
#print results
#print ("predictions: " + str(p))
#print ("true labels: " + str(y))
print("Accuracy: " + str(np.sum((p == y)/m)))
return p
pred_train = predict(train_X, train_Y, pars_test)
pred_test = predict(test_X, test_Y, pars_test)
Accuracy: 0.9383886255924171
Accuracy: 0.9299999999999998
def plot_decision_boundary(model, X, y):
# Set min and max values and give it some padding
x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1
y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole grid
Z = model(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.ylabel('x2')
plt.xlabel('x1')
plt.scatter(X[0, :], X[1, :], c=y, cmap=plt.cm.Spectral)
plt.show()
def predict_dec(parameters, X):
"""
Used for plotting decision boundary.
Arguments:
parameters -- python dictionary containing your parameters
X -- input data of size (m, K)
Returns
predictions -- vector of predictions of our model (red: 0 / blue: 1)
"""
# Predict using forward propagation and a classification threshold of 0.5
a3, cache = L_model_forward(X, parameters)
predictions = (a3>0.5)
return predictions
# plt.title("Model with He initialization")
axes = plt.gca()
axes.set_xlim([-0.75,0.40])
axes.set_ylim([-0.75,0.65])
plot_decision_boundary(lambda x: predict_dec(pars_test, x.T), train_X, np.squeeze(train_Y))