1.熟悉tensorflow基本的函数
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
import time
np.random.seed(1)
'''
实现基本的tensorflow函数练习
'''
# 1.定义平方差函数
# 1.1定义两个常量
def caculate_loss():
y_hat = tf.constant(36, name="y_hat")
y = tf.constant(39, name="y")
# 1.2定义损失函数变量--定义计算图
loss = tf.Variable((y - y_hat) ** 2, name="loss")
# 1.3定义一个初始化
init = tf.global_variables_initializer()
# 1.4创建一个session,运行初始化(损失变量将被初始化并准备计算)
# 和运行变量(计算变量的值)
with tf.Session() as session:
# 初始化变量并用session运行它,才可以取到值
session.run(init)
#
print(session.run(loss))
def caculcate_demo():
a=tf.constant(2)
b=tf.constant(10)
c=tf.multiply(a,b)
# 得到了一个Tensor类型的变量
print(c)
sess=tf.Session()
# 得到值
print(sess.run(c))
sess.close()
def placeholders_demo():
# 占位符的值只能在稍后指定,可以使用一个feed_dict字典指定
sess = tf.Session()
x=tf.placeholder(tf.int64,name="x")
print(sess.run(2*x,feed_dict={x:3}))
sess.close()
# placeholders_demo()
# 1.2定义线性函数
def linear_function():
X=tf.constant(np.random.randn(3,1),name="X")
W=tf.constant(np.random.randn(4,3),name="W")
b=tf.constant(np.random.randn(4,1),name="b")
Y=tf.matmul(W,X)+b
sess=tf.Session()
result=sess.run(Y)
sess.close()
return result
# print(linear_function())
# 1.3定义sigmoid函数
def sigmoid(z):
x=tf.placeholder(tf.float32,name="x")
sigmoid=tf.sigmoid(x)
with tf.Session() as sess:
result=sess.run(sigmoid,feed_dict={x:z})
return result
# print(sigmoid(0))
# 1.4计算交叉熵成本函数
#def caculate_cross_entropy():
# 通过下面的函数可以同时计算sigmoid和交叉熵
#tf.nn.sigmoid_cross_entropy_with_logits(logits=,labels=)
# 1.5one-hot编码
def one_hot_matrix(lables,C):
'''
创建一个矩阵,其中第i行对应第i个类号,第j列对应第j个训练样本
:param lables: 标签向量
:param C: 分类数
:return: one-hot
'''
# 定义C常量
C=tf.constant(C,name="C")
one_hot_matrix=tf.one_hot(indices=lables,depth=C,axis=0)
with tf.Session() as sess:
one_hot=sess.run(one_hot_matrix)
return one_hot
# 测试
# labels = np.array([1,2,3,0,2,1])
# one_hot = one_hot_matrix(labels,C=4)
# print(str(one_hot))
# 1.6初始化为0和1
def ones(shape):
ones=tf.ones(shape)
with tf.Session() as sess:
ones=sess.run(ones)
return ones
# 测试
# print("ones="+str(ones((2,2))))
2 - 使用TensorFlow构建神经网络
我们将会使用TensorFlow构建一个神经网络,需要记住的是实现模型需要做以下两个步骤:
1. 创建计算图
2. 运行计算图
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
# from datetime import time
import time
import datetime
import numpy as np
from tensorflow.python.framework import ops
# import bb as tf_utils
import tf_utils
import matplotlib.pyplot as plt
import tensorflow as tf
# 1.加载数据集X_train_orig(1080,64,64,3)
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = tf_utils.load_dataset()
# 查看值
# index=11
# plt.imshow(X_train_orig[index])
# print("Y="+str(np.squeeze(Y_train_orig[:,index])))
# plt.show()
# 2.数据预处理
# 2.1对数据进行扁平化和归一化处理X_train_flatten(12288,1080)
X_train_flatten = X_train_orig.reshape(X_train_orig.shape[0], -1).T
X_test_flatten = X_test_orig.reshape(X_test_orig.shape[0], -1).T
X_train = X_train_flatten / 255
X_test = X_test_flatten / 255
# 2.2转换为独热矩阵
Y_train = tf_utils.convert_to_one_hot(Y_train_orig, 6)
Y_test = tf_utils.convert_to_one_hot(Y_test_orig, 6)
# print("训练集样本数 = " + str(X_train.shape[1]))
# print("测试集样本数 = " + str(X_test.shape[1]))
# print("X_train.shape: " + str(X_train.shape))
# print("Y_train.shape: " + str(Y_train.shape))
# print("X_test.shape: " + str(X_test.shape))
# print("Y_test.shape: " + str(Y_test.shape))
# 3.初始化工作
# 3.1创建占位符
def create_placeholders(n_x, n_y):
'''
:param n_x:一个实数,图片向量的大小(64*64*3 = 12288)
:param n_y: 一个实数,分类数(从0到5,所以n_y = 6)
:return:
X - 一个数据输入的占位符,维度为[n_x, None],dtype = "float"
Y - 一个对应输入的标签的占位符,维度为[n_Y,None],dtype = "float"
'''
X = tf.placeholder(tf.float32, [n_x, None], name="X")
Y = tf.placeholder(tf.float32, [n_y, None], name="Y")
return X, Y
# X, Y = create_placeholders(12288, 6)
# print("X = " + str(X))
# print("Y = " + str(Y))
# 3.2 初始化参数
def initialize_parameters():
"""
初始化神经网络的参数,参数的维度如下:
W1 : [25, 12288]
b1 : [25, 1]
W2 : [12, 25]
b2 : [12, 1]
W3 : [6, 12]
b3 : [6, 1]
返回:
parameters - 包含了W和b的字典
tf.Variable() 每次都在创建新对象,对于get_variable()来说,对于已经创建的变量对象,就把那个对象返回,如果没有创建变量对象的话,就创建一个新的。
"""
# 指定随机种子
tf.set_random_seed(1)
W1 = tf.get_variable("W1", [25, 12288], initializer=tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable("b1", [25, 1], initializer=tf.zeros_initializer())
W2 = tf.get_variable("W2", [12, 25], initializer=tf.contrib.layers.xavier_initializer(seed=1))
b2 = tf.get_variable("b2", [12, 1], initializer=tf.zeros_initializer())
W3 = tf.get_variable("W3", [6, 12], initializer=tf.contrib.layers.xavier_initializer(seed=1))
b3 = tf.get_variable("b3", [6, 1], initializer=tf.zeros_initializer())
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
"W3": W3,
"b3": b3}
return parameters
# 测试
# tf.reset_default_graph() #用于清除默认图形堆栈并重置全局默认图形。
#
# with tf.Session() as sess:
# parameters = initialize_parameters()
# print("W1 = " + str(parameters["W1"]))
# print("b1 = " + str(parameters["b1"]))
# print("W2 = " + str(parameters["W2"]))
# print("b2 = " + str(parameters["b2"]))
# 4.前向传播
def forward_propagation(X, parameters):
"""
实现一个模型的前向传播,模型结构为LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
参数:
X - 输入数据的占位符,维度为(输入节点数量,样本数量)
parameters - 包含了W和b的参数的字典
返回:
Z3 - 最后一个LINEAR节点的输出
"""
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
Z1 = tf.add(tf.matmul(W1, X), b1) # Z1 = np.dot(W1, X) + b1
# Z1 = tf.matmul(W1,X) + b1 #也可以这样写
A1 = tf.nn.relu(Z1) # A1 = relu(Z1)
Z2 = tf.add(tf.matmul(W2, A1), b2) # Z2 = np.dot(W2, a1) + b2
A2 = tf.nn.relu(Z2) # A2 = relu(Z2)
Z3 = tf.add(tf.matmul(W3, A2), b3) # Z3 = np.dot(W3,Z2) + b3
return Z3
# 测试
# tf.reset_default_graph()
# with tf.Session() as sess:
# X,Y=create_placeholders(12288,6)
# parameters=initialize_parameters()
# Z3=forward_propagation(X,parameters)
# print("Z3="+str(Z3))
# 5.计算成本
def compute_cost(Z3, Y):
logits = tf.transpose(Z3)
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
return cost
# 测试
# tf.reset_default_graph() #用于清除默认图形堆栈并重置全局默认图形。
# with tf.Session() as sess:
# X,Y = create_placeholders(12288,6)
# parameters = initialize_parameters()
# Z3 = forward_propagation(X,parameters)
# cost=compute_cost(Z3,Y)
# print("cost = " + str(cost))
# 6.反向传播,更新参数
'''
所有反向传播和参数更新都在1行代码中处理。计算成本函数后,将创建一个“optimizer”对象。 运行tf.session时,必须将此对象与成本函数一起调用,当被调用时,它将使用所选择的方法和学习速率对给定成本进行优化
距离如:对于梯度下降
optimizer=tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
在优化过程中
_,c=sess.run([optimizer,cost],feed_dict={X:mini_batch_X,Y:mini_batch_Y})
_ 作为一次性变量来存储我们稍后不需要使用的值。 这里,_具有我们不需要的优化器的评估值(并且c取值为成本变量的值)。
'''
# 7.构建模型
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001, num_epochs=1500, minibatch_size=32, print_cost=True,
is_plot=True):
ops.reset_default_graph() # 能够重新运行模型而不覆盖tf变量
tf.set_random_seed(1)
seed = 3
(n_x, m) = X_train.shape
n_y = Y_train.shape[0]
costs = []
# 给x,y创建placeholder
X, Y = create_placeholders(n_x, n_y)
# 初始化参数
parameters = initialize_parameters()
# 前向传播--------------注意这里是X
Z3 = forward_propagation(X, parameters)
# 计算成本----------------注意这里是Y
cost = compute_cost(Z3, Y)
# 反向传播,使用Adam优化
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# 初始化所有变量
init = tf.global_variables_initializer()
# 开始回话并计算
with tf.Session() as sess:
# 初始化
sess.run(init)
# 正常训练的循环
for epoch in range(num_epochs):
epoch_cost = 0
num_minibatches = int(m / minibatch_size)
seed = seed + 1
mini_batches = tf_utils.random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in mini_batches:
# 选择一个
(minibatch_X, minibatch_Y) = minibatch
# 数据准备好,开始运行session
_, minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
# 计算这个minibatch在这一代中所占的误差
epoch_cost = epoch_cost + minibatch_cost / num_minibatches
# 记录并打印成本
## 记录成本
if epoch % 5 == 0:
costs.append(epoch_cost)
# 是否打印:
if print_cost and epoch % 100 == 0:
print("epoch = " + str(epoch) + " epoch_cost = " + str(epoch_cost))
# 是否绘制图谱
if is_plot:
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
# 保存学习后的参数
parameters = sess.run(parameters)
print("参数已经保存到session。")
# 计算当前的预测成果
correction_prediciton = tf.equal(tf.argmax(Z3), tf.argmax(Y))
# 计算准确率
accuracy = tf.reduce_mean(tf.cast(correction_prediciton, "float"))
# ??????????
print("训练集的准确率:", accuracy.eval({X: X_train, Y: Y_train}))
print("测试集的准确率:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
# 测试
# 开始时间
start_time = datetime.datetime.now()
# 开始训练
parameters=model(X_train, Y_train, X_test, Y_test)
# 结束时间
end_time = datetime.datetime.now()
# 计算时差
print("CPU执行时间:" + str(end_time - start_time) + "秒")
# 测试自己的图片,裁剪成1:1的样式,再通过格式工厂把很大的图片缩放成64x64的图片,
# 同时把jpg转化为png,因为mpimg只能读取png的图片
# import matplotlib.image as mpimg # mpimg 用于读取图片
#
# my_image1 = "5.png" #定义图片名称
# fileName1 = "images/fingers/" + my_image1 #图片地址
# image1 = mpimg.imread(fileName1) #读取图片
# plt.imshow(image1) #显示图片
# my_image1 = image1.reshape(1,64 * 64 * 3).T #重构图片
# my_image_prediction = tf_utils.predict(my_image1, parameters) #开始预测
# print("预测结果: y = " + str(np.squeeze(my_image_prediction)))
3.tf_utils代码
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import math
import tensorflow as tf
import h5py
import numpy as np
import pandas as pd
def load_dataset():
train_dataset = h5py.File('datasets/train_signs.h5', 'r')
train_set_x_orig = np.array(train_dataset["train_set_x"][:])
train_set_y_orig = np.array(train_dataset["train_set_y"][:])
test_dataset = h5py.File('datasets/test_signs.h5', 'r')
test_set_x_orig = np.array(test_dataset["test_set_x"][:])
test_set_y_orig = np.array(test_dataset["test_set_y"][:])
classes = np.array(test_dataset["list_classes"][:])
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
def convert_to_one_hot(Y, C):
Y1 = np.eye(C)
Y2 = Y.reshape(-1)
Y = Y1[Y2].T
return Y
# convert_to_one_hot(np.array([0,1,2,3]),4)
def random_mini_batches(X, Y, mini_batch_size=64, seed=0):
m = X.shape[1]
np.random.seed(seed)
mini_batches = []
# 1.shuffle
permutation = list(np.random.permutation(m))
shuffle_X = X[:, permutation]
shuffle_Y = Y[:, permutation].reshape((Y.shape[0], m))
# 2. partition
num_complete_minibatches = math.floor(m / mini_batch_size)
for k in range(0, num_complete_minibatches):
mini_batch_X = shuffle_X[:, k * mini_batch_size: k * mini_batch_size + mini_batch_size]
mini_batch_Y = shuffle_Y[:, k * mini_batch_size: k * mini_batch_size + mini_batch_size]
mini_batches.append((mini_batch_X, mini_batch_Y))
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = shuffle_X[:, num_complete_minibatches * mini_batch_size: m]
mini_batch_Y = shuffle_Y[:, num_complete_minibatches * mini_batch_size: m]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
def predict(X, parameters):
W1 = tf.convert_to_tensor(parameters["W1"])
b1 = tf.convert_to_tensor(parameters["b1"])
W2 = tf.convert_to_tensor(parameters["W2"])
b2 = tf.convert_to_tensor(parameters["b2"])
W3 = tf.convert_to_tensor(parameters["W3"])
b3 = tf.convert_to_tensor(parameters["b3"])
params = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
"W3": W3,
"b3": b3}
x = tf.placeholder("float", [12288, 1])
z3 = forward_propagation_for_predict(x, params)
p = tf.argmax(z3)
sess = tf.Session()
prediction = sess.run(p, feed_dict={x: X})
return prediction
def forward_propagation_for_predict(X, parameters):
"""
Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
Arguments:
X -- input dataset placeholder, of shape (input size, number of examples)
parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
the shapes are given in initialize_parameters
Returns:
Z3 -- the output of the last LINEAR unit
"""
# Retrieve the parameters from the dictionary "parameters"
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
# Numpy Equivalents:
Z1 = tf.add(tf.matmul(W1, X), b1) # Z1 = np.dot(W1, X) + b1
A1 = tf.nn.relu(Z1) # A1 = relu(Z1)
Z2 = tf.add(tf.matmul(W2, A1), b2) # Z2 = np.dot(W2, a1) + b2
A2 = tf.nn.relu(Z2) # A2 = relu(Z2)
Z3 = tf.add(tf.matmul(W3, A2), b3) # Z3 = np.dot(W3,Z2) + b3
return Z3