啃书系列《Tensorflow实战谷歌深度学习框架》

最新推荐文章于 2021-09-22 20:04:09 发布

听我的错不了

最新推荐文章于 2021-09-22 20:04:09 发布

阅读量136

点赞数

分类专栏：学习日常 tensorflow

本文链接：https://blog.csdn.net/weixin_45032769/article/details/99753877

版权

学习日常同时被 2 个专栏收录

70 篇文章 1 订阅

订阅专栏

tensorflow

7 篇文章 0 订阅

订阅专栏

1 神经网络参数

import tensorflow as tf

w1 = tf.Variable(tf.random_normal((2, 3), stddev=1, seed=1))

w2 = tf.Variable(tf.random_normal((3, 1), stddev=1, seed=1))

# 这里是一个矩阵1 x 2
x = tf.placeholder(tf.float32, shape=(3, 2), name="input")

a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

with tf.Session() as sess:

    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    print(sess.run(y, feed_dict={x: [[0.7, 0.9], [0.2, 0.3], [0.6, 0.8]]}))

y = tf.sigmoid(y)

cross_entropy = -tf.reduce_mean(
    y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))
    +(1-y) * tf.log(tf.clip_by_value(1-y, 1e-10, 1.0))
)

learning_rate = 0.001
train_step = tf.train.AdadeltaOptimizer(learning_rate).minimize(cross_entropy)

2 完整的神经网络样例



import tensorflow as tf

from numpy.random import RandomState

batch_size = 8

w1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))

x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input')
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')

a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

y = tf.sigmoid(y)

cross_entropy = -tf.reduce_mean(
    y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))
    + (1 - y) * tf.log(tf.clip_by_value(1-y, 1e-10, 1.0)))
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

# 生成数据集
rdm = RandomState(1)
dataset_size = 128
X = rdm.rand(dataset_size, 2)

# 定义样本,0 负样本, 1 正样本--> 大部分分类问题都是这么定义的

Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()

    sess.run(init_op)

    print(sess.run(w1))
    print(sess.run(w2))


    # 设定训练的轮数
    STEPS = 5000
    for i in range(STEPS) :
        start = (i * batch_size) % dataset_size
        end = min(start+batch_size, dataset_size)

        sess.run(train_step,
                 feed_dict={x: X[start:end], y_: Y[start:end]})
        if i % 1000 == 0:
            total_cross_entropy = sess.run(
                cross_entropy, feed_dict={x: X, y_: Y})
            print("After %d training step(s),cross entropy on all data is %g" %(i, total_cross_entropy))

3深层神经网络

"""
# 深度学习网络与深层神经网络
深度学习重要特性:多层和非线性
线性模型的最大特性就是任意组合线性模型的组合仍然是线性的
线性的网络一层和多层没什么区别,所以解决的问题很有限
所以有了深度学习--->非线性
激励函数-->去线性化  ReLu sigmoid tanh 函数

# 多层网络解决异火运算
感知模型--> 对神经网络建模
感知机的网络结构模拟抑或运算
说明隐藏层的重要性

# 经典的损失函数
Classification
Regression
交叉熵cross_entropy: 评判一个输出向量和期望向量的接近程度,刻画了两个概率分布之间的距离
他也是Classification中常用的一种损失函数
Softmax回归就是一个非常常用的方法
Softmax回归本身就可以作为一个学习算法来优化分类的结果,但是在tensorflow中softmax回归被取消了,他只是额外的一层处理层
例子: 原本一个问题的正确答案是1, 0, 0 --> softmax回归之后 答案为 0.5, 0.4, 0.1
#
y代表原始神经网络的输出结果,y_给出了标准答案
与分类问题不同,回归问题解决的是对具体数值的预测
房价预测 销量预测,etc.
回归问题一般只有一个输出节点,这个节点的输出值就是预测值
对于回归问题最常用的损失函数就是均方误差(MSE)--> 公司还看不懂,反正就是关于y和y_的
#自定义的loss函数
这样可以使输出结果更加接近实际情况





"""
import tensorflow as tf
import numpy as np

# 神经网络的向前传播算法
a = tf.nn.relu(tf.matmul(x, w1) + biases1)
y = tf.nn.relu(tf.matmul(a, w2) + biases2)

# tensor实现交叉熵的代码实现:
cross_entropy = -tf.reduce_mean(
    y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))

)
# 其中y_代表正确结果,y代表预测结果
# tfclip_by_value函数可以将一个张量中的数值限制在一个范围之内,这样可以避免一些运算错误,比如log0 是无效的
# eval是计算其内容,获取返回值
#  tfclip_by_value样例:
v = tf.constant([[1.0, 2.0, 3.0],[4.0, 5.0, 6.0]])
print(tf.clip_by_value(v, 2.5, 4.5).eval())
# 这个例子中所有不在2.5-4.5之间的都被换成了两头的数

# tfclip_by_value案例
v = tf.constant[1.0, 2.0, 3.0]
print(tf.log(v).eval())
# 输出的是对数

# 乘法案例

v1 = tf.constant([[1.0, 2.0], [3.0, 4.0]])
v2 = tf.constant([[4.0, 5.0], [2.0, 3.0]])
print((v1 * v2).eval())
print(tf.matmul(v1, v2).eval())
# 数乘 矩阵乘

# tfreduce_mean的用法
v = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
print(tf.reduce_mean(v).eval())
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
    labels=y_, logits=y
)
# y代表原始神经网络的输出结果,y_给出了标准答案
# 与分类问题不同,回归问题解决的是对具体数值的预测
# 房价预测 销量预测,etc.

# 自定义losss函数
loss = tf.reduce_sum(tf.where(tf.greater(v1, v2),
                              (v1 - v2) * a, (v2 - v1) * b))
# tf.greater 输入两个张量,比较元素的大小,返回结果
# tf.where函数有三个参数,第一个为选择条件比较是否第二个比他大, 当选择为True,会选择第二个参数中的值.否则使用第三个参数中的值
v1 = tf.constant([1.0, 2.0, 3.0, 4.0])
v2 = tf.constant([4.0, 3.0, 2.0, 1.0])

sess = tf.InteractiveSession()
print((tf.greater(v1, v2).eval()))
# 输出: [False False True True]

print(tf.where(tf.greater(v1, v2), v1, v2).eval())
# 输出: [4. 3. 3. 4.]  --> 都是取的对应位置的值
sess.close()

4loss对训练模型的影响

import tensorflow as tf

from numpy.random import RandomState

batch_size = 8

w1 = tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1))
# w2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))

x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input')
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')

# 定义了一个简单曾的神经网络向前传播的过程,这里就是简单的加权和
y = tf.matmul(x, w1)
# y = tf.matmul(a, w2)

# 定义预测多了和少了的成本
loss_less = 10
loss_more = 1
loss = tf.reduce_sum(tf.where(tf.greater(y, y_),
                              (y - y_) * loss_more,
                              (y_ - y) * loss_less))
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)







# y = tf.sigmoid(y)

# cross_entropy = -tf.reduce_mean(
#     y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))
#     + (1 - y) * tf.log(tf.clip_by_value(1-y, 1e-10, 1.0)))
# train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

# 生成数据集
rdm = RandomState(1)
dataset_size = 128
X = rdm.rand(dataset_size, 2)
# 设置回归的正确值为两个输入的和加上一个随机数量,这个随机两代表不可控的噪声,noise很小,
# 所以这里设置为-0.05 ~ 0.05的随机数
Y= [[x1 + x2 + rdm.rand() / 10.0 - 0.05] for (x1, x2) in X]

# 训练模型
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    # 设定训练的轮数
    STEPS = 5000
    for i in range(STEPS) :
        start = (i * batch_size) % dataset_size
        end = min(start+batch_size, dataset_size)

        sess.run(train_step,
                 feed_dict={x: X[start:end], y_: Y[start:end]})
        print(sess.run(w1))
        # if i % 1000 == 0:
        #     total_cross_entropy = sess.run(
        #         cross_entropy, feed_dict={x: X, y_: Y})
        #     print("After %d training step(s),cross entropy on all data is %g" %(i, total_cross_entropy))

5 神经网络

"""
Backrpopagation:反向传播算法  --> 核心算法,给出一个高效的方式在所有参数上使用梯度下降算法,使神经网络模型在训练数据上的损失函数尽可能小
Gradient decent:梯度下降算法 --> 用于优化单个参数的取值

梯度下降算法消耗时间太长--> 随机梯度下降算法(stochastic gradient descent)
但是sgd算法只是优化了某一部分而不是全局,因此--> btach--每次计算一小部分训练数据的loss函数而不是全部,相当于采样的原理






"""
import tensorflow as tf
# 神经网络大部分都遵循一下过程
batch_size = n

# 每一次读取一小部分的数据作为当前的训练数据来执行方向传播算法
x = tf.placeholder(tf.float32, shape=(batch_size, 2), name="x-input")
y_ = tf.placeholder(tf.float32, shape=(batch_size, 1), name="x-input")

# 定义神经网络结构的优化算法
loss = ...
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

# 训练神经网络
with tf.Session as sess:
    # 参数优化
    ...
    # 迭代的更新参数
    for i in range(STEPS):
        # 准备batch_size个训练数据, 一般将所有的训练数据随机打乱之后在选取可以得到更好的优化效果
        current_X, current_Y = ...
        sess.run(train_step, feed_dict={x: current_X, y_: current_Y})

6 xlearning_rate

"""
learning rate:学习效率,不能过大过小
Tensorflow提供一种灵活的学习效率设置方法--指数衰减法
tf.train.exponential_decay ,通过这个函数可以较快的得到一个比较优的解



"""
import tensorflow as tf
# decayed_learning_rate = \
# learning_rate * decay_rate ^ (global_step / decay_steps)

# 其中,decayed_learning_rate 为每一轮优化时使用的学习率,
# learning_rate为事先设定好的初始学习率,
# decay_rate为衰减系数,
# decay_steps为衰减速度
# staricase 阶梯:设置阶梯选择不同的衰减方式

global_step = tf.Variable(0)

# 通过expoential_decay函数生成学习率
learning_rate = tf.train.exponential_decay(
    0.1, global_step, 100, 0.96, staircase=True)

# 使用指数衰减的学习率,在minimize函数中传入global_step将自动更新
# global_step,从而学习率也相应更新

learning_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(...my loss..., global_step = global_step)

7过拟合

"""
过拟合问题解决办法--> 正则化(regularization)
就是在loss函数中加入刻画模型复杂度的指标..
常用科化模型复杂度的函数R(w)有两种: L1 正则化, L2正则化
无论哪种,基本思路都是通过限制权重的大小,使得模型不能任意拟合训练数据中的随机噪声
L1 --> 使的模型变得更稀疏
L2  --> 更加简洁
别的区别具体我也不知道


"""
import tensorflow as tf
import tensorflow.contrib as ct
# 一个带有L2正则化的loss函数的定义
w = tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1))
y = tf.matmul(x, w)
y_ = tf.placeholder(tf.float32, shape=(batch_size, 1), name="x-input")
loss = tf.reduce_mean(tf.square(y_ - y)) + tf.contrib.layers.l2_regularizer(lambda)(w)


# 案例
weight = tf.constant([[1.0, -2.0], [-3.0, 4.0]])
with tf.Session() as sess:
    # 输出为(|1|+|2|+|-3|+|4|) x 0.5 = 5,其中0.5就是正则化项的权重
    print( sess.run(tf.contrib.layers.l1_regularizer(.5)(weights)))
    # 输出为每项的平方和 / 2 x 0.5 =7.5
    print( sess.run(tf.contrib.layers.l2_regularizer(.5)(weights)))

8 一个5层神经网络带L2正则化的loss函数计算

import tensorflow as tf
import tensorflow.contrib as ct
# 获取一层神经网络边上的权重,并将这个权重的L2正则化损失加入名称为'losses''的集合中
def get_weights(shape, lambda):
    # 生成一个变量
    var = tf.Variable(tf.random_normal(shape),dtype=tf.float32)
    # add_to_collection函数将这个新生成的变量l2正则化损失想加入集合
    # 这个函数的第一个参数'losses'是集合的名字买第二个参数是要加入的这个集合的内容
    tf.add_to_collection(
        'losses',ct.layers.l2_regularizer(lambda), var)
    return var
    #  这段代码有点问题 ,这个正则化一直报错
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
batch_size = 8

# 定义了每一层网络中节点的个数
layer_dimension = [2, 10, 10, 10, 1]
# 神经网络的层数
n_layers = len(layer_dimension)

# 这个变量维护前向传播最深层的节点, 开始的时候就是输入层
cur_layer = x

# 当前层的节点个数
in_dimension = layer_dimension[0]

# 通过一个循环来生成5层全连接的神经网络结构
for i in range(1, n_layers):
    # layers_dimension[i]为下一层的节点个数
    out_dimension = layer_dimension[i]
    # 生成当前层中的权重的变量, 并将这个变量的L2正则化损失加入计算图上的集合
    weight = get_weights([in_dimension, out_dimension], 0.001)
    bias = tf.Variable(tf.constant(0.1, shape =[out_dimension]))
    # 使用ReLU激活函数
    cur_layer = tf.nn.relu(tf.matmul(cur_layer, weight) + bias)
    # 进入下一层之前,将下一层的节点个数更新为当前层的节点个数
    in_dimension = layer_dimension[i]

# 在定义神经网络前向传播的同事已经将所有的L2正则化损失加入了图上的集合,
# 这里只需要计算刻画模型在训练数据上的表现的损失函数
mse_loss = tf.reduce_mean(tf.square(y_ - cur_layer))

# 将军方误差损失函数加入损失集合
tf.add_to_collection('losses',mse_loss)

# get_collection 返回一个列表,这个列表是所有这个集合的元素.在这个样例中,
# 这些元素就是损失函数的不同部分, 将他们加起来就可以得到最终的损失函数
loss = tf.add_n(tf.get_collection('losses'))

9 滑动平均模型

""""
在采用随机梯度下降算法训练神经网络时,使用滑动平均模型在很多应用中都可以
在一定程度提高最终模型在测试数据上的表现

Tensorflow中提供了一个tf.train.ExponentialMovingAverage 来实现滑动平均模型.
需要: 衰减率decay -- > 控制模型更新的速度
     影子变量shadow variable -->每次运行变量更新应自变量的只会更新为:
     shadow_variable = decay x shadow_variable + (1 - decay) x variable
shadow_variable 应自变量
variable待更新的变量
decay衰减率 ---> 实际中接近于1
num_updates参数:动态设置decay的大小
               初始化参数


"""
import tensorflow as tf

# 定义一个变量用来计算滑动平均,初始值为0 ,
# 这里注意我们手动指定了变量的类型,因为所有需要计算滑动平均的变量必须是实数型
v1 = tf.Variable(0, dtype=tf.float32)
# 这里step变量模拟神经网络中迭代的轮数,可以用于动态控制衰减率
step = tf.Variable(0, trainable=False)

# 定义一个滑动平均的类(class), 初始化时给定了衰减率(0.99)和控制衰减率的变量step
ema = tf.train.ExponentialMovingAverage(0.99, step)

# 定义一个更新变量滑动平均的操作 这里需要给定一个列表,每次执行这个操作时
# 这个列表的变量都会被更新

maintain_averages_op = ema.apply([v1])
with tf.Session() as sess:
    # 初始化所有变量
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    # 通过ema.average(v1)获取滑动平均之后的变量的取值,初始化后v1,v2的滑动平均都为0
    print(sess.run(tf.assign(v1, 5)))
    # 更新v1的滑动平均值. 衰减率为min{0.99, (1+step)/(10+step)= 0.1} = 0.1,
    # 所以v1的滑动平均会被更新为0.1 x 0+ 0.9 x 5 = 4.5
    sess.run(maintain_averages_op)
    print(sess.run([v1, ema.average(v1)]))

    # 更新step 10000
    sess.run(tf.assign(step, 1000))
    # 更新v1 10
    sess.run(tf.assign(v1, 10))

    sess.run(maintain_averages_op)
    print(sess.run([v1, ema.average(v1)]))

    sess.run(maintain_averages_op)
    print(sess.run([v1, ema.average(v1)]))
    ```

10卷及神经网络

"""
卷积神经网络常用结构:
cov:高分辨率图片 --> 低分辨率图片
pooling:


"""
import tensorflow as tf
# 用tensorflow实现卷基层

# 通过tf.get_variable的方式创建过滤器的权重变量和偏置变量[filter长,宽,当前层的深度,下一层的深度]
# tf.truncated_normal_initializer: 从截断的正态分布中初始化参数
filter_weight = tf.get_variable('weight', [5, 5, 3, 16],
                             initializer=tf.truncated_normal_initializer(stddev=0.1))

# 和卷积层的权重类似, 当前层矩阵上不同位置的偏置项也是共享的,所以总共有下一层深度个不同的偏执项
biases = tf.get_variable('biases', [16], initializer=tf.constant_initializer(0.1))

# tf.nn.conv2d提供了一个非常方便的函数来实现卷积层前向出传播的算法.
# 这个函数第一个的输入是当前层的节点矩阵,四维[一个batch代号,后三个唯独对应一个节点矩阵]
# 第二个输入提供了卷积层的权重
# 第三个输入为不同维度上的步长,长度为4的数组,第一位独和第四维度必须为1,这是因为卷积层的步长只能对矩阵的长和宽有效
# 最后一个输入是填充padding的方法. SAME or VALID,SAME表示添加全0填充,VALID便是不添加
conv = tf.nn.conv2d(input, filter_weight, strides=[1, 1, 1, 1], padding='SAME')

# tf.nn.bia是_add提供了一个方便的函数个每个节点加上偏置
bias = tf.nn.bias_add(conv, biases)

# 将计算结果通过ReLU激活函数去完成去线性化
actived_cnv = tf.nn.relu(bias)

# 最大池化层的前向传播算法

# tf.nn.max_pool实现最大赤化传播,它的参数和nn.conv2d函数类似
# ksize提供了过滤器的尺寸, strides提供了步长的信息, padding提供了是否使用全0填充
pool = tf.nn.max_pool(actived_cnv, ksize=[1, 3, 3, 1]

11 LeNet5

# -*- coding: utf-8 -*-
import tensorflow as tf

# 配置神经网络的参数
INPUT_NONE = 784
OUTPUT_NONE = 10

IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10

# 第一层卷积层的尺寸和深度
CONV1_DEEP = 32
CONV1_SIZE = 5
# 第一层卷积层的尺寸和深度
CONV2_DEEP = 64
CONV2_SIZE = 5
# 全链接层的节点个数
FC_SIZE = 512

# 定义卷积神经网络的前向传播过程,这里添加了一个新的参数train,用dropout算法可以进一步提升模型可靠性病防过拟合
# dropout 过程只在训练时使用
def inference(input_tensor, train, regularizer):

    with tf.variable_scope('layer1-conv1'):
        conv1_weights = tf.get_variable(
            "weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases = tf.get_variable(
            "bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0)
        )

        # 使用边长为5, 深度为32的过滤器, 过滤器移动的步长为1, 且使用全0填充
        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))

12循环卷积神经网络

"""
循环神经网络: Recurrent Neural Network ,RNN
重要结构:长短时记忆网络 long short_term memory ,LSTM

循环神经网络:传统机器学习算法非常依赖于人工提取特征,使得图像识别等问题存在特征提取的瓶颈
循环神经网络主要用途好是处理和预测序列数据.最擅长解决与时间序列相关的问题.循环神经网络也是处理这类问题的最自然的神经网络结构
对于一个序列数据,可以将这个序列上不同时刻的数据依次传入循环网络的输入层,而输出可以是对序列下一个时刻的预测,也可以是对当前时刻信息的处理结果
循环卷积要求每个时刻都有一个输入,但是不一定每个时刻都需要有输出
循环卷积网络对长度为N的序列展开之后,可以视为一个有N个中间层的前馈神经网络,这个前馈神经网络没有循环链接,因此可以直接使用反向传播算法进行训练,而不需要特别的优化算法
,这样的训练方法为"沿时间反向传播Back_Propagation Through Time",是循环神经网络最常见的方法

# 长短时记忆网络LSTM 结构
循环神经网络可以更好地利用传统网络结构所不能建模的信息,但同时,这带来了恒大的挑战----长期依赖问题 long-term dependencies
LSTM就为了解决这个问题,在很多任务上,采用LSTM结构的循环神经网络比标准的循环神经网络表现更好.
LSTM结构是一种特殊的循环体结构,是一个有三门结构的特殊网络

 输入门:输入为上一时刻节点的输出和本时刻的输入,其输入作用
 输入节点:输入为上一时刻节点的收入和本时刻的输入,通过sig函数起到控制输入信息的作用
 遗忘门:输入为上一时刻节点的输出和本时刻的输入,其控制上一时刻内部状态信息量的作用
 输出门:输入为上一时刻节点的输出和本时刻节点的输入,通过sig函数起控制输出信息量的作用
 内部状态:包含继承上一时刻状态和产生新状态,输入为被遗忘门过滤后的上一时刻节点状态和倍数入门过滤后的本次输入
# 循环神经网络的dropout
在卷积神经网络上使用dropout方法,通过dropout,可以让卷积神经网络更加健壮,在循环神经网络上也有类似的功能


"""

import tensorflow as tf
# 简单的实现前向传播算法
import numpy as np
X = [1, 2]
state = [0.0, 0.0]

w_cell_state = np.asarray([[0.1, 0.2], [0.3, 0.4]])
w_cell_input = np.asarray([0.5, 0.6])
b_cell = np.asarray([0.1, -0.1])

w_output = np.asarray([[1.0], [2.0]])
b_output = 0.1

for i in range(len(X)):

    before_activation = np.dot(state, w_cell_state) + X[i] * w_cell_input + b_cell
    state = np.tanh(before_activation)

    # 根据当前时刻状态计算最终输出
    final_output = np.dot(state, w_output) + b_output

    # 输出每个时刻的信息
    print("before activation: ", before_activation)
    print("state: ", state)
    print("output: ", final_output)


# 实现使用LSTM结构的循环神经网络的向前传播过程:
# 定义一个LSTM结构, Tensorflow
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_hidden_size)

# BasicLSTM提供了zero_state函数生成全0状态,state包含两个张量的LSTMStateTuple类,其中state.c和state.h对应c状态和h状态
# batch_size给出了一个batch的大小,和其他网络一样,每次也会使用一个batch的训练样本
state = lstm.zero_state(batch_size, tf.float32)

# 定义损失函数loss
loss = 0.0

# 虽然在测试时循环神经网络可以处理任意长度的学列,但是在训练中为了将循环网络展开成前馈神经网络, 我们需要知道训练数据的序列长度. 在以下代码中
# 用num_step来表示这个长度.
for i in range(num_steps):
    # 在第一个时刻声明LSTM结构中使用的变量, 在以后的时刻都需要复用之前定义好的变量
    if i > 0: tf.get_variable_scope().reuse_variables()

    # 当前层输入current_input   ,xt
    # 和前一时刻的状态state(ht-1 and ct-1)春如定义的LSTM结构可以得到当前的LSTM的输出lstm_output
    # lstm_output用于输出给其他层, state用于输出给下一时刻,他们在dropout等方面可以有不同的处理方式
    lstm_output, state = lstm(current_input, state)

    # 将当前时刻LSTM结构输出传入下一个全连接层得到最后的输出
    final_output = fully_connected(lstm_output)

    # 计算当前时刻输出的损失
    loss += calc_loss(final_output, expected_output)



# 定义一个基本的LSTM结构作为循环体的基本结构深层循环神经网络也支持使用其他的循环特结构

lstm_cell = tf.nn.rnn_cell.BasicLSTMCell

stacked_lstm = tf.nn.rnn_cell.MultiRNNCell([lstm_cell(lstm_size) for _ in range(number_of_layers)])

state = stacked_lstm.zero_state(batch_size, tf.float32)

for i in range(len(num_steps)):
    if i > 0: tf.get_variable_scope().reuse_variables()
    stacked_lstm_output, state = stacked_lstm(current_input, state)
    final_output = fully_connected(stacked_lstm_output)
    loss += calc_loss(final_output, expected_output)

# 循环神经网络实现dropout功能
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell
stacked_lstm = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(lstm_cell(lstm_size))
                                            for _ in range(number_of_layers)])

13 循环卷积网络

"""
利用rnn实现对函数sin x 的预测
用Tensorflow实现预测正玄函数sin x
#过程:
将连续的函数离散化,所谓离散化就是在一个给定的区间[0,MAX]内, 通过有限个采样点模拟一个连续的曲线
比如以下程序中每隔SAMPLE_ITERVAL对 sin函数进行一次采样,采样得到的序列就是sin函数离散化之后的结果.以下程序为预测离散化之后的sin函数




"""
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf

# 加载matplotlib工具包,使用该工具可以对预测的sin函数曲线进行绘图
import matplotlib as mpl

mpl.use('Agg')
from matplotlib import pyplot as plt

#  lstm中隐藏的节点的个数
HIDDEN_SIZE = 30

# lstm的层数
NUM_LAYERS = 2

# rnn的训练序列的长度,训轮数,batch大小
TIMESTEPS = 10
TRAINING_STEPS = 10000
BATCH_SIZE = 32

# 训练数据个数 测试数据个数 采样间隔
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLES_GAP = 0.01


def generate_data(seq):
    X = []
    y = []
    # 序列的第i项和后面的TIMESTEPS-1项合在一起作为输入;
    # 第 i+TIMESTEPS项作为输出.即用sin函数前面的TIMESTEPS个点的信息,预测第i+TIMESSTEPS个点的函数值
    for i in range(len(seq) - TIMESTEPS):
        X.append([seq[i:i + TIMESTEPS]])
        y.append([seq[i + TIMESTEPS]])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)


def lstm_model(X, y, is_training):
    # 使用多层的LSTM结构
    cell = tf.nn.rnn_cell.MultiRNNCell([
        tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE)
        for _ in range(NUM_LAYERS)])

    # 使用Tensorflow接口将多层的LSTM结构连接成RNN网络并计算前向传播结构.
    outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

    # output是顶层LSTM在每一步的输出结果,它的维度是[batch_size, time,HIDDEM_SIZE]在本问题中只关注最后一个时刻的输出结果
    output = outputs[:, -1, :]

    # 对LSTM网络的输出再做加一层全连接层并计算损失.注意这里默认的损失为平均
    # 平方差损失函数
    predictions = tf.contrib.layers.fully_connected(output, 1, activation_fn=None)

    # 只在训练计算损失函数和优化步骤.测试时直接返回预测结果
    if not is_training:
        return predictions, None, None

    # 计算损失函数
    loss = tf.losses.mean_squared_error(labels=y, predictions=predictions)

    # 创建模型优化器并得到优化步骤
    train_op = tf.contrib.layers.optimize_loss(
        loss, tf.train.get_global_step(),
        optimizer="Adagrad", learning_rate=0.1
    )
    return predictions, loss, train_op


def train(sess, train_X, train_y):
    # 将训练数据以数据及的方式提供给计算图
    ds = tf.data.Dataset.from_tensor_slices((train_X, train_y))
    ds = ds.repeat().shuffle(1000).batch(BATCH_SIZE)
    X, y = ds.make_one_shot_iterator().get_next()

    # 调用模型, 得到预测的结果,损失函数,和训练操作
    with tf.variable_scope("model"):
        predictions, loss, train_op = lstm_model(X, y, True)

    # 初始化变量
    sess.run(tf.global_variables_initializer())
    for i in range(TRAINING_STEPS):
        _, l = sess.run([train_op, loss])
        if i % 100 == 0:
            print("train step: " + str(i) + ", loss: " + str(l))


def run_eval(sess, test_X, test_y):
    # 将测试数据以数据集的方式提供给计算图
    ds = tf.data.Dataset.from_tensor_slices((test_X, test_y))
    ds = ds.batch(1)
    X, y = ds.make_one_shot_iterator().get_next()

    # 调用模型得到计算值,这里不需要输入真实的y值
    with tf.variable_scope("model", reuse=True):
        prediction, _, _ = lstm_model(X, [0.0], False)

    # 将预测结果存入一个数组
    predictions = []
    labels = []
    for i in range(TESTING_EXAMPLES):
        p, l = sess.run([prediction, y])
        predictions.append(p)
        labels.append(l)

    # 计算rmse作为评价标准
    predictions = np.array(predictions).squeeze()
    labels = np.array(labels).squeeze()
    rmse = np.sqrt(((predictions - labels) ** 2).mean(axis=0))
    print("Mean Square Error is : %f" % rmse)

    # 对预测的sin函数曲线进行绘图,得到的结果如下
    plt.figure()
    plt.plot(predictions, labels='predictions')
    plt.plot(labels, label='real_sin')
    plt.legend()
    plt.show()


# 用正选函数生成训练和训练数据集合
# numpy.lispace函数可以创建一个等差序列的数组,他的常用参数有3个: 第一个参数表示起始值,四二个参数便是终止值,第三个参数便是数列长度
# 比如,linespace(1, 10, 10)
# 产生的数组就是array([1,2,3,4,5,6,7,8,9,10])

test_start = (TRAINING_EXAMPLES + TIMESTEPS) * SAMPLES_GAP
test_end = test_start + (TESTING_EXAMPLES + TIMESTEPS) * SAMPLES_GAP
train_X, train_y = generate_data(np.sin(np.linspace(
    0, test_start, TRAINING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
    test_start, test_end, TESTING_EXAMPLES + TIMESTEPS, dtype=np.float32)))

with tf.Session() as sess:
    # 训练模型
    train(sess, train_X, test_y)
    # 使用训练好的模型对测试数据进行预测
    run_eval(sess, test_X, test_y)