吴恩达深度学习1.4练习_Neural Networks and Deep Learning

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/weixin_42432468

学习心得:
1、每周的视频课程看一到两遍
2、做笔记

3、做每周的作业练习,这个里面的含金量非常高。掌握后一定要自己敲一遍,这样以后用起来才能得心应手。


1、Load Dataset

2、算法代码实现

2.1、初始化参数

2.2、正向传播相关函数

2.3、计算cost

2.4、反向传播相关函数

2.5、参数更新

3、预测

4、Results Analysis

5、Test with your own image

import time
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
# from scipy import skimage
from dnn_app_utils_yhd import *
# from dnn_app_utils import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2
'''
在执行用户代码前,重新装入 软件的扩展和模块。
autoreload 意思是自动重新装入。
它后面可带参数。参数意思你要查你自己的版本帮助文件。一般说:
无参:装入所有模块。
0:不执行 装入命令。
1: 只装入所有 %aimport 要装模块
2:装入所有 %aimport 不包含的模块。
'''
np.random.seed(1)

1、Load Dataset

train_x_orig, train_y, test_x_orig, test_y, classes = load_data()
#查看获得数据集到底是个啥东西,类型、形状、第一个实例
print ('train_x_orig:\n',type(train_x_orig),train_x_orig.shape,'\n',train_x_orig[0,0:2,0:2,0:3])
#209个训练数据,数据形状是长宽64*64的the red, green and blue channels (RGB)数据

print ('train_y:\n',type(train_y),train_y.shape,'\n',train_y[0,:5])
print ('test_x_orig:\n',type(test_x_orig),test_x_orig.shape,'\n',test_x_orig[0,0:2,0:2,0:3])
print ('test_y:\n',type(test_y),test_y.shape,'\n',test_y[0,:5])
print ('classes:\n',type(classes),classes.shape,'\n',classes)
# Example of a picture
index = 10
plt.imshow(train_x_orig[index])
print ("y = " + str(train_y[0,index]) + ". It's a " + classes[train_y[0,index]].decode("utf-8") +  " picture.")
# Explore your dataset 
m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]

print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_x_orig shape: " + str(train_x_orig.shape))
print ("train_y shape: " + str(train_y.shape))
print ("test_x_orig shape: " + str(test_x_orig.shape))
print ("test_y shape: " + str(test_y.shape))
# Reshape the training and test examples 
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

2、算法代码实现

2.1、初始化参数

def initialize_parameters(layer_dims):
    
    np.random.seed(1)
    L = len(layer_dims)
    pars = {}
    for l in range(1,L):
        pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])/ np.sqrt(layer_dims[l-1])
        #为什么在此处上面这种形式初始化参数效果会更好
#         pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        pars['b'+str(l)] = np.zeros((layer_dims[l],1))
        
    return pars
# test initialize_parameters function
pars_test = initialize_parameters([2,4,1])
print (pars_test)

2.2、正向传播相关函数

def linear_forward(A,W,b):
    
    Z = np.dot(W,A) + b
    cache = (A,W,b)
    return Z,cache
# test linear_forward function
W_test = np.random.randn(4,2)*0.01
prev_A_test = np.random.randint(2,8,6).reshape(2,3)
b_test = np.zeros((4,3))
Z_test,cache_linear_test = linear_forward(prev_A_test,W_test,b_test)
print (Z_test,'\n',cache_linear_test)
def sigmoid_forward(Z):
    '''
    arguments:
    x --> 自变量
    
    returns:
    s --> sigmoid(x)
    
    '''
    A = 1./(1+np.exp(-Z))
    cache = Z
    
    return A,cache
def relu_forward(Z):
    '''
    arguments:
    x --> 自变量
    
    returns:
    s --> ReLu(x)
    
    '''
#     s = np.maximum(0.01*x,x)
    A = np.maximum(0,Z)
    cache = Z
    
    return A,cache
def activation_forward(Z,activation):
    
    if activation == 'sigmoid':
        A,cache = sigmoid_forward(Z)
    elif activation == 'relu':
        A,cache = relu_forward(Z)
    
    return A,cache
# test activation_forward function
A_test,cache_activation_test = activation_forward(Z_test,activation='sigmoid')
print(A_test)
A_test,cache_activation_test = activation_forward(Z_test,activation='relu')
print(A_test)
def linear_activation_forward(A_prev,W,b,activation):
    
    Z,linear_cache = linear_forward(A_prev,W,b)
    A,activation_cache =  activation_forward(Z,activation)
    cache = (linear_cache,activation_cache)
    
    return A,cache
# test linear_activation_forward function
A_test,cache_test = linear_activation_forward(prev_A_test,W_test,b_test,activation='sigmoid')
print (A_test,'\n',cache_test)
def L_model_forward(X,pars):
    caches = []
    A = X
    L = len(pars)//2 + 1
    
    for l in range(1,L-1):
        A_prev = A
        A,cache = linear_activation_forward(A_prev,pars['W'+str(l)],pars['b'+str(l)],activation='relu')
        caches.append(cache)

    AL,cache = linear_activation_forward(A,pars['W'+str(L-1)],pars['b'+str(L-1)],activation='sigmoid')
    caches.append(cache)
    
    assert(AL.shape == (1,X.shape[1]))
    
    return AL,caches
# test L_model_forward function
X_test = np.random.randint(3,9,12).reshape(2,6)
AL_test,caches_test = L_model_forward(X_test,pars_test)
print (AL_test,'\n',caches_test[1][0][0])

2.3、计算cost

def compute_cost(AL,Y):
    assert(AL.shape[1] == Y.shape[1])
    cost = -np.mean(Y*np.log(AL)+(1-Y)*np.log(1-AL),axis=1,keepdims=True)
#     m = Y.shape[1]
#     cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
    cost = np.squeeze(cost)
    return cost
# Test compute_cost function
Y_test = np.array([0,1,0,1,0,0]).reshape(1,6)
cost_test = compute_cost(AL_test,Y_test)
print (cost_test)

2.4、反向传播相关函数

def sigmoid_backrward(dA,activation_cache):
    
    Z = activation_cache
#     A = 1./(1+1/np.exp(-Z))   # 公式错误的代码表示
    A = 1./(1 + np.exp(-Z))
    dZ = dA*A*(1-A)
    
    return dZ
def relu_backward(dA,activation_cache):
    
    Z = activation_cache
    dZ = np.array(dA,copy=True)
    assert (dZ.shape == Z.shape)
    dZ[Z <= 0] = 0
    
    return dZ
def activation_backward(dA,activation_cache,activation):
    
    if activation == 'sigmoid':
        dZ = sigmoid_backrward(dA,activation_cache)
    elif activation == 'relu':
        dZ = relu_backward(dA,activation_cache)
        
    return dZ
    
# test activation_backward function
dAL_test = -(np.divide(Y_test,AL_test) - np.divide(1-Y_test,1-AL_test))
activation_cache_test = caches_test[1][1]
dZ_test = activation_backward(dAL_test,activation_cache_test,activation='sigmoid')
print (dZ_test)
# dZ_test = activation_backward(dAL_test,activation_cache_test,activation='relu')
# print (dZ_test)
def linear_backward(dZ,linear_cache):
    
    A_prev, W, b = linear_cache
    m = A_prev.shape[1]
    dA_prev = np.dot(W.T,dZ)
    dW = 1./m*np.dot(dZ,A_prev.T)       #没有除以m,导致计算错误
#     db = np.mean(dZ,axis=1,keepdims=True)   #应该使用这种方式,效果应该会更好
    db = 1./m * np.sum(dZ)  #这两种方式计算db结果为什么不一样,之前都是这么计算的啊
    
    return dA_prev,dW,db
# test linear_backward function
linear_cache_test = caches_test[1][0]
dA_prev_test,dW_test,db_test = linear_backward(dZ_test,linear_cache_test)
print ('dA_prev_test:\n',dA_prev_test,'\n dW_test:',dW_test,'\n db_test:',db_test)
def activation_linear_backward(dA,cache,activation):
    
    linear_cache,activation_cache = cache
    
    dZ = activation_backward(dA,activation_cache,activation)
    dA_prev,dW,db = linear_backward(dZ,linear_cache)

    return dA_prev,dW,db
# test activation_linear_backward function
cache_test = caches_test[1]
dA_prev_test,dW_test,db_test = activation_linear_backward(dAL_test,cache_test,activation='sigmoid')
print ('dA_prev_test:\n',dA_prev_test,'\n dW_test:',dW_test,'\n db_test:',db_test)
def L_model_backward(AL,Y,caches):
    
    Y = Y.reshape(AL.shape)
    dAL = -(np.divide(Y,AL) - np.divide(1-Y,1-AL))
    grads = {}
    L = len(caches) + 1
    current_cache = caches[L-2]
    grads['dA'+str(L-1)],grads['dW'+str(L-1)],grads['db'+str(L-1)] = activation_linear_backward(dAL,current_cache,activation='sigmoid')
    for l in reversed(range(L-2)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = activation_linear_backward(grads['dA'+str(l+2)],current_cache,activation='relu')
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    
    return grads
# test L_model_backward function
grads_test = L_model_backward(AL_test,Y_test,caches_test)
print (grads_test)

2.5、参数更新

def update_parameters(pars,grads,learning_rate):
    
    L = len(pars)//2 + 1
    for l in range(1,L):
        pars['W'+str(l)] = pars['W'+str(l)] - learning_rate*grads['dW'+str(l)]
        pars['b'+str(l)] = pars['b'+str(l)] - learning_rate*grads['db'+str(l)]
    
    return pars
# test update_parameters function
pars_test = update_parameters(pars_test,grads_test,0.1)
print(pars_test)


def L_layer_model(X,Y,layer_dims,learning_rate = 0.0075,num_iterations = 3000,print_cost=False):
    
    '''
    1、初始化参数
    2、根据迭代次数循环
        3、正向传播
        4、计算cost
        5、反向传播
        6、更新参数
    7、输出costs和pars
    '''
    np.random.seed(1)
    
    #初始化参数
    pars = initialize_parameters(layer_dims)

    L = len(layer_dims)
    costs = []
    for i in range(num_iterations):
        
        #正向传播
        AL,caches = L_model_forward(X,pars)

        #计算cost
        cost = compute_cost(AL,Y) 

        if i%100 ==0 :
            costs.append(cost)
        if i%100 ==0 and print_cost:
            print("Cost after iteration %i: %f" %(i, cost))

        #反向传播
        grads = L_model_backward(AL,Y,caches)
    
        #更新参数
        pars = update_parameters(pars,grads,learning_rate)
    
    return costs,pars
# test L_layer_model function
layer_dims_test = [2,4,1]
learning_rate_test = 0.1
num_iterations_test = 1000
costs_test,pars_test = L_layer_model(X_test,Y_test,layer_dims_test,learning_rate_test,num_iterations_test,print_cost=False)
print (pars_test)
plt.figure
plt.figure(figsize=(30,6.5))
plt.subplot(1,2,1)
plt.plot(costs_test);
layers_dims = [12288, 20, 7, 5, 1] #  5-layer model
costs_test,pars_test = L_layer_model(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True)
# costs_test,pars_test = L_layer_model(train_set_x,train_set_y,layer_dims_test,learning_rate = 0.0075,num_iterations =2500,print_cost=True)
plt.figure
plt.figure(figsize = (30,6.5))
plt.subplot(1,2,1)
plt.plot(np.squeeze(costs_test))
plt.ylabel('cost')
plt.xlabel('iterations (per hundreds)')
# plt.title('learning_rate:'+str(learning_rate))
'''
三处错误导致结果不一致:
1、A = 1./(1+1/np.exp(-Z))   # 公式错误的代码表示
2、dW = 1./m*np.dot(dZ,A_prev.T)       #没有除以m,导致计算错误
3、db = np.mean(dZ,axis=1,keepdims=True)
官方作业表达式:db = 1./m * np.sum(dZ)  #这两种方式计算db结果为什么不一样,之前都是上面这么计算。
b是标量,两种情况都能使用,但为什么作业里表达式表现出更好的结果。

'''   
'''
pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])/ np.sqrt(layer_dims[l-1])
#为什么在此处上面这种形式初始化参数效果会更好?
#pars['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
'''
np.random.seed(1)
dz = np.random.randint(2,8,9).reshape(3,3)
print (dz)
db = np.mean(dz,axis=1,keepdims=True)
print (db)
db = np.sum(dz,axis=1,keepdims=True)/dz.shape[1]
print (db)
db = np.sum(dz)/dz.shape[1]
print (db)

3、预测

在指定learning_rate和num_iterations的情况下得到最优参数,再根据最优参数进行预测

def predict(X, y, parameters):
    """
    This function is used to predict the results of a  L-layer neural network.
    
    Arguments:
    X -- data set of examples you would like to label
    parameters -- parameters of the trained model
    
    Returns:
    p -- predictions for the given dataset X
    """
    
    m = X.shape[1]
    n = len(parameters) // 2 # number of layers in the neural network
    p = np.zeros((1,m))
    
    # Forward propagation
    probas, caches = L_model_forward(X, parameters)

    
    # convert probas to 0/1 predictions
    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    
    #print results
    #print ("predictions: " + str(p))
    #print ("true labels: " + str(y))
    print("Accuracy: "  + str(np.sum((p == y)/m)))
        
    return p
pred_train = predict(train_x, train_y, pars_test)
pred_test = predict(test_x, test_y, pars_test)

4、Results Analysis

找到测试集里面预测与真实数据不一致的实例,观察图片,分析哪些可能原因

def print_mislabled_images(X,y,p,classes):
    '''
    arguments:
    X: dataset
    y: true lable
    p: predict
    classes: 数据标签
 
    '''
    
    indexs = []
    for i in range(0, p.shape[1]):
        if p[0,i] != y[0,i]:
            indexs.append(i)
    num_images = len(indexs)      
    
#     a = p+y    #真实值与预测值的和只可能取0,1,2,如果为1,这说明p,y相异,以此来得出预测错误的图片位置,可以不适用循环
#     mislabeled_indices = np.asarray(np.where(a == 1))
#     num_images = len(mislabeled_indices[0])

#     plt.rcParams['figure.figsize'] = (40.0, 40.0) # set default size of plots  无论在循环外面还是里面都是一排显示所有图片
#     plt.figure(figsize=(50,50))   #放在循环外面,图片一排显示

    for i in range(num_images):
        index = indexs[i]
#         index = mislabeled_indices[1][i]
#         plt.rcParams['figure.figsize'] = (50.0, 50.0) # set default size of plots 
        plt.figure(figsize=(20,20))   #放在循环里面,图片按设定值大小显示
        plt.subplot(2, num_images, i + 1)
        plt.imshow(X[index], interpolation='nearest')
        plt.axis('off')
        plt.title("Prediction: " + classes[int(p[0,index])].decode("utf-8") + " \n Class: " + classes[int(y[0,index])].decode("utf-8"))

a_test = np.array([1,1,2,3,1,3,4,6,1]).reshape(1,9)
print (a_test)
print (np.where(a_test == 1))   # 返回对应条件的索引
test_1 = np.asarray(np.where(a_test == 1))
print (test_1)
print ('- '*20)
test_1 = np.array(np.where(a_test == 1))
print (test_1)
print_mislabled_images(test_x_orig,test_y,pred_test,classes)

A few type of images the model tends to do poorly on include:

  • Cat body in an unusual position
  • Cat appears against a background of a similar color
  • Unusual cat color and species
  • Camera Angle
  • Brightness of the picture
  • Scale variation (cat is very large or small in image)

5、Test with your own image

# my_image = "my_image.jpg" # change this to the name of your image file 
my_image = "test_image1.jpg"
my_label_y = [1] # the true class of your image (1 -> cat, 0 -> non-cat)

fname = "images/" + my_image
# image = np.array(ndimage.imread(fname, flatten=False))
image = np.array(plt.imread(fname))
plt.figure(figsize=(20,12))
plt.imshow(image)

my_image = scipy.misc.imresize(image, size=(num_px,num_px)).reshape((num_px*num_px*3,1))
# my_image = skimage.transform.resize(image, size=(num_px,num_px)).reshape((num_px*num_px*3,1))
my_predicted_image = predict(my_image, my_label_y, pars_test)
print ("y = " + str(np.squeeze(my_predicted_image)) + ", your L-layer model predicts a \"" + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") +  "\" picture.")
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值