TensorFlow第五步:返回起点、深挖坑,解刨一个麻雀。

一、设计最简单的数字识别问题

二、设计最简单的神经网络,仅含一隐藏层,4X3X2,激活函数:sigmod+softmax;损失函数:(均值)交叉熵cross-entropy

三、利用梯度下降法和BP算法编程求解,观察整个求解过程。

BP算法见:

参考http://neuralnetworksanddeeplearning.com/chap3.htmll

如采用softmax+cross-entropy,BP1为:

推导参考:https://blog.csdn.net/haolexiao/article/details/72757796

训练数据检查如下:

# coding=utf-8
import os  
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error 
 
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
 
logs_path=r'c:/temp/log_mnist_softmax'
learning_rate=5 #
training_epochs=100
 
trainData_in=np.array([[1.0,1.0,0.0,0.0],\
              [1.0,0.0,1.0,0.0],\
              [1.0,0.0,0.0,1.0],\
              [1.0,0.0,0.0,0.0],\
              [0.0,1.0,0.0,0.0],\
              [0.0,0.0,0.0,1.0]])
trainData_out=np.array([[0.0,1.0],\
               [0.0,1.0],\
               [0.0,1.0],\
               [1.0,0.0],\
               [1.0,0.0],\
               [1.0,0.0]])

testData_in=np.array([[0.0,0.0,0.0,1.0]])
testData_out=np.array([[1.0,0.0]])
 
print(np.shape(trainData_in))
print(np.shape(trainData_out))

for i in range(len(trainData_in)):
    print(trainData_out[i])
    I=trainData_in[i]
    J=trainData_out[i]
    print (list(J).index(max(J))) #J是array,转成list才有index
    I.resize(2,2)
    plt.subplot(6,6,i*6+1)
    plt.imshow(I,cmap='Greys_r')
plt.show()

前向传播用tensorflow计算图完成,代码如下:

x_input=tf.placeholder(tf.float32, [None,4], name='x_input')
y_desired=tf.placeholder(tf.float32,[None,2],name='y_desired')
#w1=tf.Variable(tf.zeros([4,3]),name='w1')
w1=tf.Variable(tf.truncated_normal([4,3],stddev=0.1),name='w1')
b1=tf.Variable(tf.zeros([3]),name='b1')
z1=tf.matmul(x_input,w1)+b1
y1=tf.nn.sigmoid(z1)

#w=tf.Variable(tf.zeros([3,2]),name='w')
w=tf.Variable(tf.truncated_normal([3,2],stddev=0.1),name='w')
b=tf.Variable(tf.zeros([2]),name='b')
z=tf.matmul(y1,w)+b
y_output=tf.nn.softmax(z,name='y_output')
lossFun_crossEntropy=-tf.reduce_mean(y_desired*tf.log(y_output)) #交叉熵均值

feed_dict_trainData={x_input:trainData_in,y_desired:trainData_out}
feed_dict_testData={x_input:testData_in,y_desired:testData_out}

###
#train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(lossFun_crossEntropy)
###
tf.summary.scalar('cost',lossFun_crossEntropy)
summary_op=tf.summary.merge_all()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    logs_writer=tf.summary.FileWriter(logs_path,graph=tf.get_default_graph())
    for epoch in range(training_epochs):
#        _,summary=sess.run([train_step,summary_op],feed_dict=feed_dict_trainData)

误差反向传播 BP计算代码如下:

#####
        #cross-entropy+softmax BP
        w1_temp,b1_temp,z1_temp,y1_temp,w_temp,b_temp,z_temp,y_output_temp,cost_temp=\
        sess.run([w1,b1,z1,y1,w,b,z,y_output,lossFun_crossEntropy],\
                 feed_dict=feed_dict_trainData)
        delta=y_output_temp-trainData_out  #BP1
        print('delta=',delta)
        nabla_b=delta.sum(axis=0)#在列方向上求和delta #BP3
        print('nabla_b=',nabla_b)
        print('y1=',y1_temp)
        nabla_w=np.dot(y1_temp.transpose(),delta) #BP4
        print('nabla_w=',nabla_w)
        
        print('z1=',z1_temp)
        dSigmod_z1=sess.run(tf.nn.sigmoid(z1_temp)*(1-tf.nn.sigmoid(z1_temp)))
        print('dSigmod_z1=',dSigmod_z1)
        delta=np.dot(delta,w_temp.transpose())*dSigmod_z1 #BP2!!!
        print('w=',w_temp)
        print('delta=',delta)
        nabla_b1=delta.sum(axis=0)#在列方向上求和delta #BP3
        print('nabla_b1=',nabla_b1)
        nabla_w1=np.dot(trainData_in.transpose(),delta)  #BP4
        print('x_input=',trainData_in)
        print('nabla_w=',nabla_w1)
        
        m,n=np.shape(trainData_out)
        update_w1=tf.assign(w1,w1-learning_rate/m/n*nabla_w1)
        update_b1=tf.assign(b1,b1-learning_rate/m/n*nabla_b1)
        update_w=tf.assign(w,w-learning_rate/m/n*nabla_w)
        update_b=tf.assign(b,b-learning_rate/m/n*nabla_b)
        
        print('w1\'=',sess.run(update_w1))
        print('b1\'=',sess.run(update_b1))
        print('w\'=',sess.run(update_w))
        print('b\'=',sess.run(update_b))
        
        #####
        summary=sess.run(summary_op,feed_dict=feed_dict_trainData)
        logs_writer.add_summary(summary,epoch)

当w1,b1,w,b初始值都设为0时,6个样例的delta和正好=0,由BP3,输出层b对lossFun的敏感度nabla_b=0.

因为w=0,由BP2,delta无法传递到隐藏层,由BP3,隐藏层b1对lossFun的敏感度nabla_b1=0,由BP4,隐藏层w1对lossFun的敏感度nabla_w1=0。

因为输出层的delta正负正好相等,隐藏层的激活值y1全是0.5,由BP4,输出层的w对lossFun的敏感度nabla_w=0

所以,网络学习失败!!!

改变w1,w的初始值,见程序中。学习率5,学习100次,结果见下图,和tensorflow的tf.train.GradientDescentOptimizer(learning_rate).minimize(lossFun_crossEntropy)结果基本一致!

 

随机生成w1,w,进行训练3次,平均交叉熵cost如下图,在训练初始,cost均上升,在50步到70步之间出现波动。

 

异常波动的原因是学习率过大引起。将学习率/10,第49步到50步,Cost如下图:

 

从59步到60步,微调学习率,代码如下:

# coding=utf-8
import os  
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error 
 
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
 
logs_path=r'c:/temp/log_mnist_softmax'
learning_rate=5 #当>0.05时误差很大
training_epochs=1
 
trainData_in=np.array([[1.0,1.0,0.0,0.0],\
              [1.0,0.0,1.0,0.0],\
              [1.0,0.0,0.0,1.0],\
              [1.0,0.0,0.0,0.0],\
              [0.0,1.0,0.0,0.0],\
              [0.0,0.0,0.0,1.0]])
trainData_out=np.array([[0.0,1.0],\
               [0.0,1.0],\
               [0.0,1.0],\
               [1.0,0.0],\
               [1.0,0.0],\
               [1.0,0.0]])

testData_in=np.array([[0.0,0.0,0.0,1.0]])
testData_out=np.array([[1.0,0.0]])
 
print(np.shape(trainData_in))
print(np.shape(trainData_out))

x_input=tf.placeholder(tf.float32, [None,4], name='x_input')
y_desired=tf.placeholder(tf.float32,[None,2],name='y_desired')
#w1=tf.Variable(tf.zeros([4,3]),name='w1')
w1=tf.Variable(tf.truncated_normal([4,3],stddev=0.1),name='w1')
b1=tf.Variable(tf.zeros([3]),name='b1')
z1=tf.matmul(x_input,w1)+b1
y1=tf.nn.sigmoid(z1)

#w=tf.Variable(tf.zeros([3,2]),name='w')
w=tf.Variable(tf.truncated_normal([3,2],stddev=0.1),name='w')
b=tf.Variable(tf.zeros([2]),name='b')
z=tf.matmul(y1,w)+b
y_output=tf.nn.softmax(z,name='y_output')
lossFun_crossEntropy=-tf.reduce_mean(y_desired*tf.log(y_output)) #交叉熵均值

feed_dict_trainData={x_input:trainData_in,y_desired:trainData_out}
feed_dict_testData={x_input:testData_in,y_desired:testData_out}

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(training_epochs):
        #####
        w1_=np.array([[0.46067277,-1.9804744,1.5933108],
                      [0.17863363,-0.82081705,0.86146206],
                      [0.4359305,-1.3856853,1.3689806],
                      [0.1085812,-0.83159065,0.861873]])
        b1_=np.array([-0.71817315,1.857689,-1.7725247])
        w_=np.array([[-0.49652696,0.26705313],
                     [2.5914109,-2.7570033],
                     [-1.9809961,1.9674655]])
        b_=np.array([0.6382933,-0.63829315])

        w1_50=np.array([[0.53725445,-2.3688202,1.9239993],
                        [0.21249956,-1.0122195,1.0180486],
                        [0.45423114,-1.4623545,1.4372292],
                        [0.14236526,-1.0221516,1.0185666]])
        b1_50=np.array([-0.64476234,1.4880188,-1.4563937])
        w_50=np.array([[-0.68910146,0.45962757],
                       [2.5146284,-2.6802208],
                       [-2.266828,2.2532973]])
        b_50=np.array([0.25415757,-0.25415745])        
        
        nabla_w1=(w1_50-w1_)/5
        nabla_b1=(b1_50-b1_)/5
        nabla_w=(w_50-w_)/5
        nabla_b=(b_50-b_)/5
                
        cost_fi=np.zeros(11)
        fi=np.zeros(11)
        for i in range(11):
            w1_50=w1_+nabla_w1*0.5*i
            b1_50=b1_+nabla_b1*0.5*i
            w_50=w_+nabla_w*0.5*i
            b_50=b_+nabla_b*0.5*i
            if False:
                update_w1=tf.assign(w1,w1_)
                update_b1=tf.assign(b1,b1_)
                update_w=tf.assign(w,w_)
                update_b=tf.assign(b,b_) 
            else:
                update_w1=tf.assign(w1,w1_50)
                update_b1=tf.assign(b1,b1_50)
                update_w=tf.assign(w,w_50)
                update_b=tf.assign(b,b_50)                                      
       
            print('w1\'=',sess.run(update_w1))
            print('b1\'=',sess.run(update_b1))
            print('w\'=',sess.run(update_w))
            print('b\'=',sess.run(update_b))
            
            fi[i]=i
            cost_fi[i]=sess.run(lossFun_crossEntropy,feed_dict=feed_dict_trainData)
            
        plt.plot(fi,cost_fi)
        plt.show()
        #####
    

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值