Course 4 - 卷积神经网络 - 第一周作业

0.cnn计算原理,参考:https://zhuanlan.zhihu.com/p/29119239

np.pad()函数讲解,参考:https://blog.csdn.net/qq_37535492/article/details/88766347

对于np.pad的讲解,在随机生成的图片维度(m,n_h,n_w,n_c)

1.diy写cnn的前向传播

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import numpy as np
import matplotlib.pyplot as plt

# 1.定义边界填充函数
def zero_pad(X,pad):
    X_paded=np.pad(X,((0,0),# 第一维不填充
                (pad,pad), #第二维填充2个pad
                (pad,pad), # 第三维填充2个pad
                (0,0)),# 第四维不填充
             'constant',constant_values=0)# 连续一样的值填充
    return X_paded
# 测试
# X=np.random.randn(4,3,3,2)
# print(X.shape)
# X_paded=zero_pad(X,2)
# print(X_paded.shape)
#
# # 绘制图显示
# fig,axarr=plt.subplots(1,2)
# axarr[0].set_title('x')
# axarr[0].imshow(X[0,:,:,0])
# axarr[1].set_title('x_paded')
# axarr[1].imshow(X_paded[0,:,:,0])
# plt.show()

# 2.单步卷积 -一个x的切片与w(过滤器)相乘,并+b,返回求和的实数值
def conv_single_step(a_slice_prev,W,b):
    s=np.multiply(a_slice_prev,W)+b
    Z=np.sum(s)
    return Z
# 测试
# a_slice_prev=np.random.randn(4,3,3)
# w=np.random.randn(4,3,3)
# b=np.random.randn(1)或者(1,1,1)
# z=conv_single_step(a_slice_prev,w,b)
# print(z)

# 3.定义卷积层的向前传播函数
def conv_forward(A_prev,W,b,hparameters):
    (m,n_h_prev,n_w_prev,n_c_prev)=A_prev.shape
    (f,f,n_C_prev,n_C)=W.shape
    # 获取超参数的值
    stride=hparameters['stride']
    pad=hparameters['pad']

    # 计算卷积后的维度
    n_h=int((n_h_prev-f+2*pad)/stride)+1
    n_w=int((n_w_prev-f+2*pad)/stride)+1

    Z=np.zeros((m,n_h,n_w,n_C))

    A_prev_paded=zero_pad(A_prev,pad)

    for i in range(m):
        a_prev_pad=A_prev_paded[i]
        for h in range(n_h):
            for w in range(n_w):
                for c in range(n_C):
                    v_start=h*stride #垂直开始的位置--n_h轴
                    v_end=v_start+f
                    h_start=w*stride # 水平开始的位置--n_w轴
                    h_end=h_start+f
                    # 切片取值
                    a_silce_prev=a_prev_pad[v_start:v_end,h_start:h_end,:]
                    #卷积
                    Z[i,h,w,c]=conv_single_step(a_silce_prev,W[:,:,:,c],b[:,:,:,c])
    assert(Z.shape==(m,n_h,n_w,n_C))
    #缓存值
    cache=(A_prev,W,b,hparameters)
    return (Z,cache)
# 测试
# np.random.seed(1)
# A_prev=np.random.randn(10,4,4,3)
# W=np.random.randn(2,2,3,8)
# b=np.random.randn(1,1,1,8)
# hparameters={
#     'pad':2,
#     'stride':1
# }
# Z,cache=conv_forward(A_prev,W,b,hparameters)
# print(Z.shape)

# 4. 定义池化层的前向传播函数
def pool_forward(A_prev,hparameters,mode='max'):
    (m,n_h_prev,n_w_prev,n_c_prev)=A_prev.shape
    f=hparameters['f']
    stride=hparameters['stride']

    # 计算输出维度
    n_h=int((n_h_prev-f)/2)+1
    n_w=int((n_w_prev-f)/2)+1
    n_c=n_c_prev # 通道数不变

    # 初始化输出矩阵
    A=np.zeros((m,n_h,n_w,n_c))
    # 遍历
    for i in range(m):
        for h in range(n_h):
            for w in range(n_w):
                for c in range(n_c):
                    h_start=h*stride
                    h_end=h_start+f
                    v_start=w*stride
                    v_end=v_start+f
                    # 切片
                    a_slice=A_prev[i,v_start:v_end,h_start:h_end,:]
                    if mode=='max':
                        A[i,h,w,c]=np.max(a_slice)
                    elif mode=='mean':
                        A[i,h,w,c]=np.mean(a_slice)
    assert (A.shape==(m,n_h,n_w,n_c))
    cache=(A_prev,hparameters)
    return (A,cache)
# 测试
# np.random.seed(1)
# A_prev=np.random.randn(2,4,4,3)
# hparamenters={
#     'f':4,
#     'stride':1
# }
# A,cache=pool_forward(A_prev,hparamenters,'max')
# print("max="+str(A))
# A,cache=pool_forward(A_prev,hparamenters,'mean')
# print("mean="+str(A))
# print(A.shape)

#5,卷积层的反向传播da dw db===========了解
def conv_backward(dZ,cache):
    """
    实现卷积层的反向传播

    参数:
        dZ - 卷积层的输出Z的 梯度,维度为(m, n_H, n_W, n_C)
        cache - 反向传播所需要的参数,conv_forward()的输出之一

    返回:
        dA_prev - 卷积层的输入(A_prev)的梯度值,维度为(m, n_H_prev, n_W_prev, n_C_prev)
        dW - 卷积层的权值的梯度,维度为(f,f,n_C_prev,n_C)
        db - 卷积层的偏置的梯度,维度为(1,1,1,n_C)

    """
    #获取cache的值
    (A_prev, W, b, hparameters) = cache

    #获取A_prev的基本信息
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape

    #获取dZ的基本信息
    (m,n_H,n_W,n_C) = dZ.shape

    #获取权值的基本信息
    (f, f, n_C_prev, n_C) = W.shape

    #获取hparaeters的值
    pad = hparameters["pad"]
    stride = hparameters["stride"]

    #初始化各个梯度的结构
    dA_prev = np.zeros((m,n_H_prev,n_W_prev,n_C_prev))
    dW = np.zeros((f,f,n_C_prev,n_C))
    db = np.zeros((1,1,1,n_C))

    #前向传播中我们使用了pad,反向传播也需要使用,这是为了保证数据结构一致
    A_prev_pad = zero_pad(A_prev,pad)
    dA_prev_pad = zero_pad(dA_prev,pad)

    #现在处理数据
    for i in range(m):
        #选择第i个扩充了的数据的样本,降了一维。
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]

        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    #定位切片位置
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f

                    #定位完毕,开始切片
                    a_slice = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]

                    #切片完毕,使用上面的公式计算梯度
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end,:] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i,h,w,c]
                    db[:,:,:,c] += dZ[i,h,w,c]
        #设置第i个样本最终的dA_prev,即把非填充的数据取出来。
        dA_prev[i,:,:,:] = da_prev_pad[pad:-pad, pad:-pad, :]

    #数据处理完毕,验证数据格式是否正确
    assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))

    return (dA_prev,dW,db)

# 6. 池化层的反向传播
# 6.1 最大值池化层的反向传播
def create_mask_from_window(x):
    """
    从输入矩阵中创建掩码,以保存最大值的矩阵的位置。

    参数:
        x - 一个维度为(f,f)的矩阵

    返回:
        mask - 包含x的最大值的位置的矩阵
    """
    mask = x == np.max(x)

    return mask
# 6.2 平均值的反向传播
def distribute_value(dz,shape):
    """
    给定一个值,为按矩阵大小平均分配到每一个矩阵位置中。

    参数:
        dz - 输入的实数
        shape - 元组,两个值,分别为n_H , n_W

    返回:
        a - 已经分配好了值的矩阵,里面的值全部一样。

    """
    #获取矩阵的大小
    (n_H , n_W) = shape

    #计算平均值
    average = dz / (n_H * n_W)

    #填充入矩阵
    a = np.ones(shape) * average

    return a
# 池化层的反向传播
def pool_backward(dA,cache,mode = "max"):
    """
    实现池化层的反向传播

    参数:
        dA - 池化层的输出的梯度,和池化层的输出的维度一样
        cache - 池化层前向传播时所存储的参数。
        mode - 模式选择,【"max" | "average"】

    返回:
        dA_prev - 池化层的输入的梯度,和A_prev的维度相同

    """
    #获取cache中的值
    (A_prev , hparaeters) = cache

    #获取hparaeters的值
    f = hparaeters["f"]
    stride = hparaeters["stride"]

    #获取A_prev和dA的基本信息
    (m , n_H_prev , n_W_prev , n_C_prev) = A_prev.shape
    (m , n_H , n_W , n_C) = dA.shape

    #初始化输出的结构
    dA_prev = np.zeros_like(A_prev)

    #开始处理数据
    for i in range(m):
        a_prev = A_prev[i]
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    #定位切片位置
                    vert_start = h
                    vert_end = vert_start + f
                    horiz_start = w
                    horiz_end = horiz_start + f

                    #选择反向传播的计算方式
                    if mode == "max":
                        #开始切片
                        a_prev_slice = a_prev[vert_start:vert_end,horiz_start:horiz_end,c]
                        #创建掩码
                        mask = create_mask_from_window(a_prev_slice)
                        #计算dA_prev
                        dA_prev[i,vert_start:vert_end,horiz_start:horiz_end,c] += np.multiply(mask,dA[i,h,w,c])

                    elif mode == "average":
                        #获取dA的值
                        da = dA[i,h,w,c]
                        #定义过滤器大小
                        shape = (f,f)
                        #平均分配
                        dA_prev[i,vert_start:vert_end, horiz_start:horiz_end ,c] += distribute_value(da,shape)
    #数据处理完毕,开始验证格式
    assert(dA_prev.shape == A_prev.shape)

    return dA_prev
# 测试
# np.random.seed(1)
# A_prev = np.random.randn(5, 5, 3, 2)
# hparameters = {"stride" : 1, "f": 2}
# A, cache = pool_forward(A_prev, hparameters)
# dA = np.random.randn(5, 4, 2, 2)
#
# dA_prev = pool_backward(dA, cache, mode = "max")
# print("mode = max")
# print('mean of dA = ', np.mean(dA))
# print('dA_prev[1,1] = ', dA_prev[1,1])
# print()
# dA_prev = pool_backward(dA, cache, mode = "average")
# print("mode = average")
# print('mean of dA = ', np.mean(dA))
# print('dA_prev[1,1] = ', dA_prev[1,1])

2.用tensorflow实现cnn

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import cnn_utils
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
'''
创建的模型:conv-relu-maxpool-conv-relu-maxpool-fc
'''
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = cnn_utils.load_dataset()
# index=6
# plt.imshow(X_train_orig[index])
# plt.show()
# print('y='+str(np.squeeze(Y_train_orig[:,index])))

X_train = X_train_orig/255.
X_test = X_test_orig/255.
Y_train = cnn_utils.convert_to_one_hot(Y_train_orig, 6).T
Y_test = cnn_utils.convert_to_one_hot(Y_test_orig, 6).T
# print ("number of training examples = " + str(X_train.shape[0]))
# print ("number of test examples = " + str(X_test.shape[0]))
# print ("X_train shape: " + str(X_train.shape))
# print ("Y_train shape: " + str(Y_train.shape))
# print ("X_test shape: " + str(X_test.shape))
# print ("Y_test shape: " + str(Y_test.shape))
conv_layers = {}

# 1.创建占位符
def create_placeholders(n_h0,n_w0,n_c0,n_y):
    # 小批量数据块,输入的样本数量可能不固定,使用None作为可变数量
    X=tf.placeholder(tf.float32,[None,n_h0,n_w0,n_c0])
    Y=tf.placeholder(tf.float32,[None,n_y])
    return X,Y
# 测试
# X , Y = create_placeholders(64,64,3,6)
# print ("X = " + str(X))
# print ("Y = " + str(Y))

# 2.初始化参数
# 初始化卷积神经网络的w1和w2,不用初始化全连接层,框架会自动初始化
def initialize_parameters():
    tf.set_random_seed(1)
    W1=tf.get_variable('W1',[4,4,3,8],initializer=tf.contrib.layers.xavier_initializer(seed=0))
    W2=tf.get_variable('W2',[2,2,8,16],initializer=tf.contrib.layers.xavier_initializer(seed=0))
    parameters={
        'W1':W1,
        'W2':W2
    }
    return parameters
# 测试
# tf.reset_default_graph()
# with tf.Session() as sess_test:
#     parameters=initialize_parameters()
#     init=tf.global_variables_initializer()
#     sess_test.run(init)
#     print("w1="+str(parameters['W1'].eval()[1,1,1]))
#     print("w2="+str(parameters['W2'].eval()[1,1,1]))
#
#     sess_test.close()

# 3.前向传播
'''
模型:CONV2D→RELU→MAXPOOL→CONV2D→RELU→MAXPOOL→FULLCONNECTED
全连接层(FC):使用没有非线性激活函数的全连接层。这里不要调用SoftMax, 
这将导致输出层中有6个神经元,然后再传递到softmax。 
在TensorFlow中,softmax和cost函数被集中到一个函数中,在计算成本时您将调用不同的函数。
'''
def forward_propagation(X,parameters):
    W1=parameters['W1']
    W2=parameters['W2']

    # Conv2d : 步伐:1,填充方式:“SAME”
    Z1=tf.nn.conv2d(X,W1,strides=[1,1,1,1],padding='SAME')

    # RELU
    A1=tf.nn.relu(Z1)

    # Max pool : 过滤器大小:8x8,步伐:8x8,填充方式:“SAME”
    P1=tf.nn.max_pool(A1,ksize=[1,8,8,1],strides=[1,8,8,1],padding='SAME')

    # Conv2d :步伐:1,填充方式:“SAME”
    Z2=tf.nn.conv2d(P1,W2,strides=[1,1,1,1],padding='SAME')

    # RELU
    A2=tf.nn.relu(Z2)

    # Max pool : 过滤器大小:4x4,步伐:4x4,填充方式:“SAME”
    P2=tf.nn.max_pool(A2,ksize=[1,4,4,1],strides=[1,4,4,1],padding='SAME')

    # 一维化上一维的输出
    P=tf.contrib.layers.flatten(P2)

    # 全连接层(FC):使用没有非线性激活函数的全连接层
    Z3=tf.contrib.layers.fully_connected(P,6,activation_fn=None)

    return Z3
# 测试
# tf.set_random_seed(1)
# tf.reset_default_graph()
# with tf.Session() as sess_test:
#     X,Y=create_placeholders(64,64,3,6)
#     parameters=initialize_parameters()
#     Z3=forward_propagation(X,parameters)
#
#     init=tf.global_variables_initializer()
#     sess_test.run(init)
#     a=sess_test.run(Z3,{X:np.random.randn(2,64,64,3),Y:np.random.randn(2,6)})
#     print('z3='+str(a))
#     sess_test.close()

# 4.计算成本
def compute_cost(Z3,Y):
    cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z3,labels=Y))
    return cost
# 测试
# tf.reset_default_graph()
#
# with tf.Session() as sess_test:
#     np.random.seed(1)
#     X,Y = create_placeholders(64,64,3,6)
#     parameters = initialize_parameters()
#     Z3 = forward_propagation(X,parameters)
#     cost = compute_cost(Z3,Y)
#
#     init = tf.global_variables_initializer()
#     sess_test.run(init)
#     a = sess_test.run(cost,{X: np.random.randn(4,64,64,3), Y: np.random.randn(4,6)})
#     print("cost = " + str(a))
#
#     sess_test.close()

# 5.构建模型
def model(X_train,Y_train,X_test,Y_test,learning_rate=0.009,num_epochs=100,minibatch_size=64,print_cost=True,isPlot=True):
    ops.reset_default_graph()#能够重新运行模型而不覆盖tf变量
    tf.set_random_seed(1)
    seed=3
    (m,n_h,n_w,n_c)=X_train.shape
    n_y=Y_train.shape[1]

    costs=[]

    # 为当前维度创建占位符
    X,Y=create_placeholders(n_h,n_w,n_c,n_y)

    # 初始化参数
    parameters=initialize_parameters()

    # 前向传播
    Z3=forward_propagation(X,parameters)

    #计算成本
    cost=compute_cost(Z3,Y)

    #反向传播,选择优化器即可
    optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    # 初始化全局变量
    init=tf.global_variables_initializer()

    # 开始运行
    with tf.Session() as sess:
        # 初始化参数
        sess.run(init)
        # 开始遍历epoch
        for epoch in range(num_epochs):
            minibatch_cost=0
            num_minibathes=int(m/minibatch_size) # 数据块的数量
            seed=seed+1
            minbatches=cnn_utils.random_mini_batches(X_train,Y_train,minibatch_size,seed)
            # 遍历每个数据块
            for minibatch in minbatches:
                (minibatch_x,minibatch_y)=minibatch
                #最小化当前数据块的成本
                _,temp_cost=sess.run([optimizer,cost],feed_dict={X:minibatch_x,Y:minibatch_y})
                # 累加数据块的成本值----注意这里除以num_minibathes
                minibatch_cost+=temp_cost/num_minibathes
            if print_cost:
                if epoch%5==0:
                    print("当前是第"+str(epoch)+"次迭代,成本为"+str(minibatch_cost))
            if epoch%10==0:
                costs.append(minibatch_cost)
        #数据处理完毕,开始绘制成本函数曲线
        if isPlot:
            plt.plot(np.squeeze(costs))
            plt.xlabel('iterations(per tens)')
            plt.ylabel('cost')
            plt.title('learning_rate:'+str(learning_rate))
            plt.show()
        # 开始预测数据
        # 计算当前的预测情况
        predict_op=tf.arg_max(Z3,1)
        correct_prediction=tf.equal(predict_op,tf.arg_max(Y,1))

        # 计算准确度
        accuracy=tf.reduce_mean(tf.cast(correct_prediction,'float'))
        print('correct_predict_accuracy='+str(accuracy))

        #计算训练集的准确度
        train_accuracy=accuracy.eval({X:X_train,Y:Y_train})
        test_accuracy=accuracy.eval({X:X_test,Y:Y_test})

        print("训练集准确度:" + str(train_accuracy))
        print("测试集准确度:" + str(test_accuracy))

        return (train_accuracy, test_accuracy, parameters)
# 启动模型
train_accuracy, test_accuracy, parameters=model(X_train,Y_train,X_test,Y_test,num_epochs=150)

# 预测函数
def predict(parameters):
    X = tf.placeholder(tf.float32, [None,64,64,3])
    Z3=forward_propagation(X,parameters)

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        a=sess.run(Z3,feed_dict={X:np.random.randn(1,64,64,3)})
        b=np.squeeze(np.argmax(a, axis=1))
        sess.close()
    return b
# 测试
# print("预测结果:"+str(predict(parameters)))






3.需要的cnn_utils

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import math

import h5py
import numpy as np

def load_dataset():
    train_dataset = h5py.File('datasets/train_signs.h5', 'r')
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])

    test_dataset = h5py.File('datasets/test_signs.h5', 'r')
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])

    classes = np.array(test_dataset["list_classes"][:])

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes


def convert_to_one_hot(Y,c):
    Y1=np.eye(c)
    Y2=Y.reshape(-1)
    Y=Y1[Y2].T
    return Y
# Y=np.array([0,1,2,1,2])
# Y=convert_to_one_hot(Y,3)

def random_mini_batches(X, Y, mini_batch_size=64, seed=0):
    m = X.shape[0]
    np.random.seed(seed)
    mini_batches = []

    # 1.shuffle
    permutation = list(np.random.permutation(m))
    shuffle_X = X[permutation,:,:,: ]
    shuffle_Y = Y[permutation,:]

    # 2. partition
    num_complete_minibatches = math.floor(m / mini_batch_size)
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffle_X[k * mini_batch_size: k * mini_batch_size + mini_batch_size,:,:,:]
        mini_batch_Y = shuffle_Y[k * mini_batch_size: k * mini_batch_size + mini_batch_size,:]
        mini_batches.append((mini_batch_X, mini_batch_Y))
        # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffle_X[num_complete_minibatches * mini_batch_size: m,:,:,:]
        mini_batch_Y = shuffle_Y[num_complete_minibatches * mini_batch_size: m,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)

        return mini_batches


 

  • 5
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值