Course 2 - 改善深层神经网络 - 第三周作业 - TensorFlow入门

1.熟悉tensorflow基本的函数

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from  tensorflow.python.framework import ops
import time

np.random.seed(1)
'''
实现基本的tensorflow函数练习
'''
# 1.定义平方差函数
# 1.1定义两个常量
def caculate_loss():
    y_hat = tf.constant(36, name="y_hat")
    y = tf.constant(39, name="y")
    # 1.2定义损失函数变量--定义计算图
    loss = tf.Variable((y - y_hat) ** 2, name="loss")
    # 1.3定义一个初始化
    init = tf.global_variables_initializer()
    # 1.4创建一个session,运行初始化(损失变量将被初始化并准备计算)
    # 和运行变量(计算变量的值)
    with tf.Session() as session:
        # 初始化变量并用session运行它,才可以取到值
        session.run(init)
        #
        print(session.run(loss))
def caculcate_demo():
    a=tf.constant(2)
    b=tf.constant(10)
    c=tf.multiply(a,b)
    # 得到了一个Tensor类型的变量
    print(c)
    sess=tf.Session()
    # 得到值
    print(sess.run(c))
    sess.close()
def placeholders_demo():
    # 占位符的值只能在稍后指定,可以使用一个feed_dict字典指定
    sess = tf.Session()
    x=tf.placeholder(tf.int64,name="x")
    print(sess.run(2*x,feed_dict={x:3}))
    sess.close()
# placeholders_demo()
# 1.2定义线性函数
def linear_function():
    X=tf.constant(np.random.randn(3,1),name="X")
    W=tf.constant(np.random.randn(4,3),name="W")
    b=tf.constant(np.random.randn(4,1),name="b")

    Y=tf.matmul(W,X)+b

    sess=tf.Session()
    result=sess.run(Y)
    sess.close()
    return result
# print(linear_function())
# 1.3定义sigmoid函数
def sigmoid(z):
    x=tf.placeholder(tf.float32,name="x")
    sigmoid=tf.sigmoid(x)
    with tf.Session() as sess:
        result=sess.run(sigmoid,feed_dict={x:z})
    return result
# print(sigmoid(0))
# 1.4计算交叉熵成本函数
#def caculate_cross_entropy():
    # 通过下面的函数可以同时计算sigmoid和交叉熵
    #tf.nn.sigmoid_cross_entropy_with_logits(logits=,labels=)
# 1.5one-hot编码
def one_hot_matrix(lables,C):
    '''
    创建一个矩阵,其中第i行对应第i个类号,第j列对应第j个训练样本
    :param lables: 标签向量
    :param C: 分类数
    :return: one-hot
    '''
    # 定义C常量
    C=tf.constant(C,name="C")
    one_hot_matrix=tf.one_hot(indices=lables,depth=C,axis=0)
    with tf.Session() as sess:
        one_hot=sess.run(one_hot_matrix)
    return one_hot
# 测试
# labels = np.array([1,2,3,0,2,1])
# one_hot = one_hot_matrix(labels,C=4)
# print(str(one_hot))
# 1.6初始化为0和1
def ones(shape):
    ones=tf.ones(shape)
    with tf.Session() as sess:
        ones=sess.run(ones)
    return ones
# 测试
# print("ones="+str(ones((2,2))))

2 - 使用TensorFlow构建神经网络

我们将会使用TensorFlow构建一个神经网络,需要记住的是实现模型需要做以下两个步骤: 
1. 创建计算图 
2. 运行计算图

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
# from datetime import time
import time
import datetime

import numpy as np
from tensorflow.python.framework import ops
# import bb as tf_utils
import tf_utils
import matplotlib.pyplot as plt
import tensorflow as tf

# 1.加载数据集X_train_orig(1080,64,64,3)
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = tf_utils.load_dataset()

# 查看值
# index=11
# plt.imshow(X_train_orig[index])
# print("Y="+str(np.squeeze(Y_train_orig[:,index])))
# plt.show()

# 2.数据预处理
# 2.1对数据进行扁平化和归一化处理X_train_flatten(12288,1080)
X_train_flatten = X_train_orig.reshape(X_train_orig.shape[0], -1).T
X_test_flatten = X_test_orig.reshape(X_test_orig.shape[0], -1).T

X_train = X_train_flatten / 255
X_test = X_test_flatten / 255

# 2.2转换为独热矩阵
Y_train = tf_utils.convert_to_one_hot(Y_train_orig, 6)
Y_test = tf_utils.convert_to_one_hot(Y_test_orig, 6)


# print("训练集样本数 = " + str(X_train.shape[1]))
# print("测试集样本数 = " + str(X_test.shape[1]))
# print("X_train.shape: " + str(X_train.shape))
# print("Y_train.shape: " + str(Y_train.shape))
# print("X_test.shape: " + str(X_test.shape))
# print("Y_test.shape: " + str(Y_test.shape))
# 3.初始化工作
# 3.1创建占位符
def create_placeholders(n_x, n_y):
    '''

    :param n_x:一个实数,图片向量的大小(64*64*3 = 12288)
    :param n_y: 一个实数,分类数(从0到5,所以n_y = 6)
    :return:
      X - 一个数据输入的占位符,维度为[n_x, None],dtype = "float"
      Y - 一个对应输入的标签的占位符,维度为[n_Y,None],dtype = "float"
    '''
    X = tf.placeholder(tf.float32, [n_x, None], name="X")
    Y = tf.placeholder(tf.float32, [n_y, None], name="Y")
    return X, Y


# X, Y = create_placeholders(12288, 6)
# print("X = " + str(X))
# print("Y = " + str(Y))
# 3.2 初始化参数
def initialize_parameters():
    """
        初始化神经网络的参数,参数的维度如下:
            W1 : [25, 12288]
            b1 : [25, 1]
            W2 : [12, 25]
            b2 : [12, 1]
            W3 : [6, 12]
            b3 : [6, 1]

        返回:
            parameters - 包含了W和b的字典
            tf.Variable() 每次都在创建新对象,对于get_variable()来说,对于已经创建的变量对象,就把那个对象返回,如果没有创建变量对象的话,就创建一个新的。
        """
    # 指定随机种子
    tf.set_random_seed(1)
    W1 = tf.get_variable("W1", [25, 12288], initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable("b1", [25, 1], initializer=tf.zeros_initializer())

    W2 = tf.get_variable("W2", [12, 25], initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b2 = tf.get_variable("b2", [12, 1], initializer=tf.zeros_initializer())

    W3 = tf.get_variable("W3", [6, 12], initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b3 = tf.get_variable("b3", [6, 1], initializer=tf.zeros_initializer())

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}

    return parameters


# 测试
# tf.reset_default_graph() #用于清除默认图形堆栈并重置全局默认图形。
#
# with tf.Session() as sess:
#     parameters = initialize_parameters()
#     print("W1 = " + str(parameters["W1"]))
#     print("b1 = " + str(parameters["b1"]))
#     print("W2 = " + str(parameters["W2"]))
#     print("b2 = " + str(parameters["b2"]))

# 4.前向传播
def forward_propagation(X, parameters):
    """
    实现一个模型的前向传播,模型结构为LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX

    参数:
        X - 输入数据的占位符,维度为(输入节点数量,样本数量)
        parameters - 包含了W和b的参数的字典

    返回:
        Z3 - 最后一个LINEAR节点的输出

    """

    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    Z1 = tf.add(tf.matmul(W1, X), b1)  # Z1 = np.dot(W1, X) + b1
    # Z1 = tf.matmul(W1,X) + b1             #也可以这样写
    A1 = tf.nn.relu(Z1)  # A1 = relu(Z1)
    Z2 = tf.add(tf.matmul(W2, A1), b2)  # Z2 = np.dot(W2, a1) + b2
    A2 = tf.nn.relu(Z2)  # A2 = relu(Z2)
    Z3 = tf.add(tf.matmul(W3, A2), b3)  # Z3 = np.dot(W3,Z2) + b3

    return Z3


# 测试
# tf.reset_default_graph()
# with tf.Session() as sess:
#     X,Y=create_placeholders(12288,6)
#     parameters=initialize_parameters()
#     Z3=forward_propagation(X,parameters)
#     print("Z3="+str(Z3))
# 5.计算成本
def compute_cost(Z3, Y):
    logits = tf.transpose(Z3)
    labels = tf.transpose(Y)

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))

    return cost


# 测试
# tf.reset_default_graph() #用于清除默认图形堆栈并重置全局默认图形。
# with tf.Session() as sess:
#     X,Y = create_placeholders(12288,6)
#     parameters = initialize_parameters()
#     Z3 = forward_propagation(X,parameters)
#     cost=compute_cost(Z3,Y)
#     print("cost = " + str(cost))

# 6.反向传播,更新参数
'''
所有反向传播和参数更新都在1行代码中处理。计算成本函数后,将创建一个“optimizer”对象。 运行tf.session时,必须将此对象与成本函数一起调用,当被调用时,它将使用所选择的方法和学习速率对给定成本进行优化
距离如:对于梯度下降
optimizer=tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
在优化过程中
_,c=sess.run([optimizer,cost],feed_dict={X:mini_batch_X,Y:mini_batch_Y})
 _ 作为一次性变量来存储我们稍后不需要使用的值。 这里,_具有我们不需要的优化器的评估值(并且c取值为成本变量的值)。
'''


# 7.构建模型
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001, num_epochs=1500, minibatch_size=32, print_cost=True,
          is_plot=True):
    ops.reset_default_graph()  # 能够重新运行模型而不覆盖tf变量
    tf.set_random_seed(1)
    seed = 3
    (n_x, m) = X_train.shape
    n_y = Y_train.shape[0]
    costs = []

    # 给x,y创建placeholder
    X, Y = create_placeholders(n_x, n_y)

    # 初始化参数
    parameters = initialize_parameters()

    # 前向传播--------------注意这里是X
    Z3 = forward_propagation(X, parameters)

    # 计算成本----------------注意这里是Y
    cost = compute_cost(Z3, Y)

    # 反向传播,使用Adam优化
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    # 初始化所有变量
    init = tf.global_variables_initializer()

    # 开始回话并计算
    with tf.Session() as sess:
        # 初始化
        sess.run(init)

        # 正常训练的循环
        for epoch in range(num_epochs):
            epoch_cost = 0
            num_minibatches = int(m / minibatch_size)
            seed = seed + 1
            mini_batches = tf_utils.random_mini_batches(X_train, Y_train, minibatch_size, seed)

            for minibatch in mini_batches:
                # 选择一个
                (minibatch_X, minibatch_Y) = minibatch
                # 数据准备好,开始运行session
                _, minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})

                # 计算这个minibatch在这一代中所占的误差
                epoch_cost = epoch_cost + minibatch_cost / num_minibatches

            # 记录并打印成本
            ## 记录成本
            if epoch % 5 == 0:
                costs.append(epoch_cost)
                # 是否打印:
                if print_cost and epoch % 100 == 0:
                    print("epoch = " + str(epoch) + "    epoch_cost = " + str(epoch_cost))

        # 是否绘制图谱
        if is_plot:
            plt.plot(np.squeeze(costs))
            plt.ylabel('cost')
            plt.xlabel('iterations (per tens)')
            plt.title("Learning rate =" + str(learning_rate))
            plt.show()

        # 保存学习后的参数
        parameters = sess.run(parameters)
        print("参数已经保存到session。")

        # 计算当前的预测成果
        correction_prediciton = tf.equal(tf.argmax(Z3), tf.argmax(Y))

        # 计算准确率
        accuracy = tf.reduce_mean(tf.cast(correction_prediciton, "float"))
        # ??????????
        print("训练集的准确率:", accuracy.eval({X: X_train, Y: Y_train}))
        print("测试集的准确率:", accuracy.eval({X: X_test, Y: Y_test}))

        return parameters


# 测试
# 开始时间
start_time = datetime.datetime.now()
# 开始训练
parameters=model(X_train, Y_train, X_test, Y_test)
# 结束时间
end_time = datetime.datetime.now()
# 计算时差
print("CPU执行时间:" + str(end_time - start_time) + "秒")
# 测试自己的图片,裁剪成1:1的样式,再通过格式工厂把很大的图片缩放成64x64的图片,
# 同时把jpg转化为png,因为mpimg只能读取png的图片
# import matplotlib.image as mpimg # mpimg 用于读取图片
#
# my_image1 = "5.png"                                            #定义图片名称
# fileName1 = "images/fingers/" + my_image1                      #图片地址
# image1 = mpimg.imread(fileName1)                               #读取图片
# plt.imshow(image1)                                             #显示图片
# my_image1 = image1.reshape(1,64 * 64 * 3).T                    #重构图片
# my_image_prediction = tf_utils.predict(my_image1, parameters)  #开始预测
# print("预测结果: y = " + str(np.squeeze(my_image_prediction)))

3.tf_utils代码

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import math
import tensorflow as tf
import h5py
import numpy as np
import pandas as pd


def load_dataset():
    train_dataset = h5py.File('datasets/train_signs.h5', 'r')
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])

    test_dataset = h5py.File('datasets/test_signs.h5', 'r')
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])

    classes = np.array(test_dataset["list_classes"][:])

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes


def convert_to_one_hot(Y, C):
    Y1 = np.eye(C)
    Y2 = Y.reshape(-1)
    Y = Y1[Y2].T
    return Y


# convert_to_one_hot(np.array([0,1,2,3]),4)

def random_mini_batches(X, Y, mini_batch_size=64, seed=0):
    m = X.shape[1]
    np.random.seed(seed)
    mini_batches = []

    # 1.shuffle
    permutation = list(np.random.permutation(m))
    shuffle_X = X[:, permutation]
    shuffle_Y = Y[:, permutation].reshape((Y.shape[0], m))

    # 2. partition
    num_complete_minibatches = math.floor(m / mini_batch_size)
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffle_X[:, k * mini_batch_size: k * mini_batch_size + mini_batch_size]
        mini_batch_Y = shuffle_Y[:, k * mini_batch_size: k * mini_batch_size + mini_batch_size]
        mini_batches.append((mini_batch_X, mini_batch_Y))
        # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffle_X[:, num_complete_minibatches * mini_batch_size: m]
        mini_batch_Y = shuffle_Y[:, num_complete_minibatches * mini_batch_size: m]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)

        return mini_batches


def predict(X, parameters):
    W1 = tf.convert_to_tensor(parameters["W1"])
    b1 = tf.convert_to_tensor(parameters["b1"])
    W2 = tf.convert_to_tensor(parameters["W2"])
    b2 = tf.convert_to_tensor(parameters["b2"])
    W3 = tf.convert_to_tensor(parameters["W3"])
    b3 = tf.convert_to_tensor(parameters["b3"])

    params = {"W1": W1,
              "b1": b1,
              "W2": W2,
              "b2": b2,
              "W3": W3,
              "b3": b3}

    x = tf.placeholder("float", [12288, 1])

    z3 = forward_propagation_for_predict(x, params)
    p = tf.argmax(z3)

    sess = tf.Session()
    prediction = sess.run(p, feed_dict={x: X})

    return prediction


def forward_propagation_for_predict(X, parameters):
    """
    Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX

    Arguments:
    X -- input dataset placeholder, of shape (input size, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
                  the shapes are given in initialize_parameters
    Returns:
    Z3 -- the output of the last LINEAR unit
    """

    # Retrieve the parameters from the dictionary "parameters"
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    # Numpy Equivalents:
    Z1 = tf.add(tf.matmul(W1, X), b1)  # Z1 = np.dot(W1, X) + b1
    A1 = tf.nn.relu(Z1)  # A1 = relu(Z1)
    Z2 = tf.add(tf.matmul(W2, A1), b2)  # Z2 = np.dot(W2, a1) + b2
    A2 = tf.nn.relu(Z2)  # A2 = relu(Z2)
    Z3 = tf.add(tf.matmul(W3, A2), b3)  # Z3 = np.dot(W3,Z2) + b3

    return Z3

注意:tensorflow先定义好计算图,在run之前所有的计算步骤都在构造计算图,只有在run之后,才开始计算

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值