Python之Tensorflow学习笔记

在此记录我的tensorflow的学习

0、简单的tensorflow操作

这里我们定义两个变量w1、w2(中间层变量)

w1是一个2行3列的矩阵、w2是一个3行1列的矩阵

x是占位符,为输入的数据占位,x是未知个,2列的数据

使用feed_dict模式输入数据,喂入神经网络中

#coding:utf-8

import tensorflow as tf

x = tf.placeholder(tf.float32, shape=(None, 2))
w1 = tf.Variable(tf.random_normal([2, 3], stddev=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1))

# 定义前向传播过程
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

# 用于会话计算结果
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    print('y :', sess.run(y, feed_dict={x: [[0.7, 0.5],
        [0.2, 0.3], [0.3, 0.4], [0.4, 0.5]]}))

输出结果:(输入数据的个数的行数,1列的矩阵)

 

1、简单的制造些数据

import tensorflow as tf
import numpy as np
BATCH_SIZE = 8

# 产生随机数
rng = np.random.RandomState()

X = rng.rand(32, 2)

Y = [[int(x0 + x1 < 1)] for (x0, x1) in X]

print('X:', X)
print('Y:', Y)

2、定义神经网络的输入、参数和输出、定义前向传播过程

#定义神经网络的输入输出、参数和输出、定义前向传播过程
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))

w1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))

a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

3、定义损失函数及反向传播方法、(三种传播方法, 加自定义损失函数)

#定义损失函数及反向传播方法 、 三种反向传播方法
loss = tf.reduce_mean(tf.square(y - y_))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# train_step = tf.train.MomentOptimizer(0.01, 0.9).minimize(loss)
# train_step = tf.train.AdamOptimizer(0.01).minimize(loss)

# 自定义损失函数 、 其中 COST是人为定义的价值(花费) PROFIT(人为定义的利润) y是推测的,y_是真实的结果
#loss_mse = tf.reduce_sum(tf.where(tf.greater(y, y_), (y - y_) * COST, (y_ - y) * PROFIT))

#train_step = tf.train.GradientDescentOptimizer(0.005).minimize(loss_mse)

 

4、生成会话,开始训练

# 生成会话,开始训练
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    for i in range(3000):
        start = (i * BATCH_SIZE) % 32
        end = start + BATCH_SIZE
        sess.run(train_step, feed_dict={x: X[start: end], y_: Y[start: end]})
        if i % 500 == 0 :
            total_loss = sess.run(loss, feed_dict={x: X, y_: Y})
            print('i = ', i, 'total_loss =', total_loss)

    print()
    print('w1:', sess.run(w1))
    print('w2', sess.run(w1))

输出结果:

 

5、学习率

√学习率 learning_rate:表示了每次参数更新的幅度大小。

学习率过大,会导致待优化的参数在最小值附近波动,不收敛;

学习率过小,会导致待优化的参数收敛缓慢。

在训练过程中,参数的更新向着损失函数梯度下降的方向。

import tensorflow as tf

LEARNING_RATE_BASE = 0.1 # 最初学习率
LEARNING_RATE_DECAY = 0.99 # 学习率衰减率
LEARNING_RATE_STEP = 1 # 喂入多少轮BATCH_SIZE后、更新一次学习率,一般设置为 总样本上/BATCH_SIZE

# 运行几轮BATCH_SIZE计数器,初值为0,设为不训练
global_step = tf.Variable(0, trainable=False)

# 定义指数下降学习率
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, LEARNING_RATE_STEP, LEARNING_RATE_DECAY, staircase = True)
# 定义待优化参数、初值为5
w = tf.Variable(tf.constant(5, dtype=tf.float32))
# 定义损失函数loss
loss = tf.square(w + 1)
# 定义反向传播方法
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)

# 生成会话,训练
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    for i in range(40):
        sess.run(train_step)
        learning_rate_val = sess.run(learning_rate)
        global_step_val = sess.run(global_step)
        w_val = sess.run(w)
        loss_val = sess.run(loss)
        print('After %s steps: global_step is %f, w is %f, learning_rate is %f, loss is %f' % (i, global_step_val, w_val, learning_rate_val, loss_val))

6、滑动平均

代码如下:

import tensorflow as tf

# 定义一个变量及滑动平均类
w1 = tf.Variable(0, dtype=tf.float32)
# 定义num_updates(NN的迭代轮数),初始值为0,不可优化(训练
global_step = tf.Variable(0, trainable=False)
# 实现滑动平均类,给删减率为0.99,当前轮数global_step
MOVING_AVERAGE_DECAY = 0.99
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
# tf.trainable_variables()自动将所有待训练的参数汇总为列表
ema_op = ema.apply(tf.trainable_variables())

# 查看不同迭代中变量的取值变化
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    # 使用ema.average(w1)获取w1活动平均值
    print(sess.run([w1, ema.average(w1)]))

    # 将参数w1赋值为1
    sess.run(tf.assign(w1, 1))
    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    # 更新step和w1的值,模拟100轮迭代后,参数w1变为10
    sess.run(tf.assign(global_step, 100))
    sess.run(tf.assign(w1, 10))
    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    # 打印10次 运行sess.run()所更新的w1的滑动平均值
    for i in range(10):
        sess.run(ema_op)
        print(sess.run([w1, ema.average(w1)]))

运行结果:

 

可以看得出来,每一次执行,参数w1的滑动平均值都向着参数w1靠近,(滑动平均追随参数的变化而变化

 

7、正则化

其中:cem是交叉熵

 

我们随机生成300个符合正态分布的点X[x0, x1]作为数据集,根据点X[x0, x1]计算生成标注Y_,并使用红色点和蓝色点在图表中显示出来

规则:当x0 * x0 + x1 * x1 < 2时,y_=1,标注为红色,否则标注为蓝色

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

BATCH_SIZE = 30

# 产生随机数
rdm = np.random.RandomState()

# 随机生成300行2列的矩阵,表示300组坐标点(x0, x1)作为输入数据集
X = rdm.randn(300, 2)

# 从X这个数据集中、取出一行、如果两个坐标的平方和小于2,给Y赋值为1,否则为0
Y_ = [int(x0 * x0 + x1 * x1 < 2) for (x0, x1) in X]

# 遍历Y中的每个元素、1赋值为’red‘, 否则赋值为’blue;
Y_c = [['red' if y else 'blue'] for y in Y_]

# 对数据集X和标签Y进行shape整理,随第二个参数计算得到,第二个元素表示有多少列,
# X 整理为n行2列,把Y整理为n行1列
X = np.vstack(X).reshape(-1, 2)
Y_ = np.vstack(Y_).reshape(-1, 1)

print('X:', X)
print('Y_"', Y_)
print('Y_c', Y_c)

# 使用plt.scatter画出数据集X各行中0列元素和第1列元素的点即各行的(x0, x1)
# 用各行的Y_c的值表示颜色
plt.scatter(X[:, 0], X[:, 1], c = np.squeeze(Y_c))
plt.show()


# 定义神经网络的输入、参数和输出。定义前向传播过程
def get_weight(shape, regularizer):
    w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
    return w

def get_bias(shape):
    b = tf.Variable(tf.constant(0.01, shape=shape))
    return b

x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))

w1 = get_weight([2, 11], 0.01)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)

w2 = get_weight([11, 1], 0.01)
b2 = get_bias([1])
y = tf.matmul(y1, w2) + b2 # 输出层不过激活

# 定义损失函数
loss_mse = tf.reduce_mean(tf.square(y - y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))

# 定义反向传播方法,不含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_mse)

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    for i in range(40000):
        start = (i * BATCH_SIZE) % 300
        end = start + BATCH_SIZE
        sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start: end]})
        if i % 2000 == 0:
            loss_mse_v = sess.run(loss_mse, feed_dict={x: X, y_: Y_})
            print('After %d steps, loss is: %f' % (i, loss_mse_v))

    # xx 在-3到3之间以步长为0.01,yy在-3到3之间以步长0.01,生成二维网络坐标
    xx, yy = np.mgrid[-3: 3: 0.01, -3: 3: 0.01]
    # 将xx, yy 拉直,合并成一个2列的矩阵,得到一个网格坐标点的集合
    grid = np.c_[xx.ravel(), yy.ravel()]
    # 将网格坐标点喂入神经网络,probs为输出
    probs = sess.run(y, feed_dict={x: grid})
    # probs的shape调整为xx的样子
    probs = probs.reshape(xx.shape)
    
    print('w1:', sess.run(w1))
    print('b1:', sess.run(b1))
    print('w2:', sess.run(w2))
    print('b2:', sess.run(b2))

plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, level=[.5])
plt.show()



# 定义反向传播方法:包含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_total)

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    for i in range(40000):
        start = (i * BATCH_SIZE) % 300
        end = start + BATCH_SIZE
        sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start:end]})
        if i % 2000 == 0:
            loss_val = sess.run(loss_total, feed_dict={x: X, y_: Y_})
            print('After %d steps, loss is %f' % (i, loss_val))


    xx, yy = np.mgrid[-3: 3: 0.01, -3: 3: 0.01]
    grid = np.c_[xx.ravel(), yy.ravel()]
    probs = sess.run(y, feed_dict={x: grid})
    probs = probs.reshape(xx.shape)

    print('w1:', sess.run(w1))
    print('b1:', sess.run(b1))
    print('w2:', sess.run(w2))
    print('b2:', sess.run(b2))

plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, level=[0.5])
plt.show()

图1:生成的随机点

图2:未正则化的

图3:正则化训练后的

有无正则化的模型结果可看出来,正则化能使拟合的曲线平滑,模型具有更好的泛化能力

 

8、模块化实现

# 搭建模块化神经网络八股
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

BATCH_SIZE = 30
LEARNING_RATE_BASE = 0.001
LEARNING_RATE_DECAY = 0.999
REGULARIZER = 0.01

# 定义生成数据集函数
def generateds():
    rdm = np.random.RandomState()
    X = rdm.randn(300, 2)
    Y_ = [int(x0*x0 + x1*x1 < 2) for (x0, x1) in X]
    Y_c = [['red' if y else 'blue'] for y in Y_]
    
    X = np.vstack(X).reshape(-1, 2)
    Y_ = np.vstack(Y_).reshape(-1, 1)

    return X, Y_, Y_c

# 定义神经网络的输入、参数和输出、定义前向传播过程
def get_weight(shape, regularizer):
    w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
    return w

def get_bias(shape):
    b = tf.Variable(tf.constant(0.01, shape=shape))
    return b

def forward(x, regularizer):
    w1 = get_weight([2, 11], regularizer)
    b1 = get_bias([11])
    y1 = tf.nn.relu(tf.matmul(x, w1) + b1)

    w2 = get_weight([11, 1], regularizer)
    b2 = get_bias([1])
    y = tf.matmul(y1, w2) + b2 # 输出层不过激活
    return y


# 反向传播模块
def backward():
    x = tf.placeholder(tf.float32, shape=(None, 2))
    y_ = tf.placeholder(tf.float32, shape=(None, 1))

    X, Y_, Y_c = generateds() # 获取数据集
    
    y = forward(x, REGULARIZER)

    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(
            LEARNING_RATE_BASE,
            global_step,
            300/BATCH_SIZE,
            LEARNING_RATE_DECAY,
            staircase=True)

    # 定义损失函数
    loss_mse = tf.reduce_mean(tf.square(y - y_))
    loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))

    # 定义反向传播方法:包含正则化
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total)
    
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        for i in range(40000):
            start = (i * BATCH_SIZE) % 300
            end = start + BATCH_SIZE
            sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start: end]})
            if i % 2000 == 0:
                loss_val = sess.run(loss_total, feed_dict={x: X, y_: Y_})
                print('After %d steps, loss is %f' % (i, loss_val))
        
        xx, yy = np.mgrid[-3: 3: 0.01, -3: 3: 0.01]
        grid = np.c_[xx.ravel(), yy.ravel()]
        probs = sess.run(y, feed_dict={x: grid})
        probs = probs.reshape(xx.shape)
    
    plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
    plt.contour(xx, yy, probs, levels=[.5])
    plt.show()

if __name__=='__main__':
    backward()

输出:

 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值