模拟 Tensorflow 神经网络的训练

内容提要:(1)使用 python和 numpy实现神经网络的一个例子 , 这个过程中不使用 Tensorflow等任何深度学习架构。(2)使用Tensorflow训练同一个例子。(3)两种训练的结果得到的参数进行比较。

使用python及numpy包模拟一个简单的深度学习神经网络训练过程,以平均方差作为 损失函数。计算图如下:


这个例子使用学生的词汇量和阅读量预测英语得分,部分训练样本的输入如下:

'''
X=[ Vocabulary , Reading]
Y_=[ English score ]
'''
TRAIN_THRESHOLD = 3
X=[[1.23330274,7.40049697],
[2.10652012,5.33739393],
[0.1165997 ,9.18747008],
[7.20571883,0.33421428],
[7.65559469,1.37209321],
[2.27062682,6.06083184],
[7.55380109,8.52735541],
[0.01807387,5.21226027],
[4.41630107,4.85377414],

对应的输出如下:

Y_=[[43.16831799],
[44.52129707],
[38.03247702],
[80.59976427],
[89.69991445],
[49.22022243],
[100.],
[21.04785364],
[67.99440827],

w1和w2 分别初始化为 [1.0,1.0,1.0],[2.0,1.0,1.0] 和 [[2.0],[3.0],[5.0]]。


learn()函数实现每个矩阵的训练。每次训练过程这样,按开始的w1和w2计算一次loss(记为Loss1), 然后依次将 w1/w2中的某个元素增加一个小量(wDelta = 0.000001 ) ,比如w1(0,0)<--w1(0,0)+wDelta ,计算新的Loss(记为loss2), (Loss2-loss1)/wDelta 就是对应该元素的偏导数 DeltaLoss(Loss对w1(0,0)的偏导数),训练出来的新w1(0,0)就是 w1(0,0)- DeltaLoss ,使用这样的新参数预测结果时,loss会减少 。按照上述方法对w1和w2的每个元素进行训练,取得新结果,就完成一轮训练。

各函数简单介绍一下。directLearnMain() 是模拟神经网络的总体代码。directValidateMain() 用来验证,它调用calAccury()计算准确度。forwardnn 是实现前向预测,使用numpy的点乘实现矩阵运算。squareErrorAvg 计算平均方差。

import tensorflow as tf
import numpy as np
from traindata import *
from dataDim import *

BATCH_SIZE = 8
def  matrixCopy( m , mNew ) :
    for  r  in   range( len( m ) )  :
        for c in  range ( len ( m[r] ) ) :
            mNew[r][c] = m[r][c]
def forwardnn(xa, w1 , w2 , ya) :
    i = 0 
    for  x in xa :
        a1 = np.dot( x , w1 )
        y = np.dot( a1 , w2 ) 
        ya[ i ] = y
        i = i + 1
def  squareErrorAvg( y_a , ya ) :
    e = 0.0
    yindex = 0 ;
    for  y_    in   y_a  :
        y = ya[yindex]
        e1 = (y_[0]- y[0]) * (y_[0]- y[0])
        e  += e1
        yindex = yindex + 1
    e  /=  yindex
    return e 

def  learn( layer ,  w1, w2 , wnew ,  xa , ya , y_a , learnRate , wDelta  ) :
    forwardnn( xa ,  w1 , w2 , ya )
    e1 = squareErrorAvg( y_a , ya )
    if layer == 1 :
        w = w1
    else:
        w = w2 
    for  r  in   range( len( wnew ) )  :
        for c in  range ( len ( wnew[r] ) ) :
            v = w[r][c]
            w[r][c] += wDelta
            forwardnn( xa ,  w1 , w2 , ya )
            e2 = squareErrorAvg( y_a , ya )
            learnN = (e2-e1)/wDelta * learnRate # (e2-e1)/wDelta :loss函數的微分 
            w[r][c] =  v
            wnew[r][c]  =  v- learnN

def calAccury(  ya ) : # ya is list to predicted result   
    ya = np.array( ya )
    y_a = Y_T
    y_a = np.array( y_a ) 
    accury =np.array(ya - y_a)
    a = ( abs( accury ) < TRAIN_THRESHOLD )
    af = a.astype( np.float32 )
    right = af.sum()
    per = right/VALIDATE_SIZE 
    return per , a 

def directValidateMain(w1 , w2):
    xa = XT[0:VALIDATE_SIZE]
    ya =  [[0.0]]* VALIDATE_SIZE
    forwardnn(xa, w1 , w2 , ya)
    per  , a  = calAccury(  ya )
    #for i in range( len( ya ) ):
    #     print ( xa[i] , '=>'  , ya[i] , '||', Y_T[i] , a[i]  )          
    return per 

''' Main for  Leaned by myself '''
def directLearnMain() :
    w1= np.array(  [[1.0,1.0,1.0],[2.0,1.0,1.0]] ) 
    w2= np.array( [[2.0],[3.0],[5.0]] ) 
    Y = [    [0.0] ]* SAMPLE_SIZE          
    STEPS = 3000
    w1new = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    w2new= [[0.0],[0.0],[0.0]]
    for i in range(STEPS):
        start =  (i*BATCH_SIZE) % SAMPLE_SIZE 
        end =  start + BATCH_SIZE
        xa = X[start:end]
        ya = Y[start:end]
        y_a = Y_[start:end]
        learn( 1 ,  w1, w2 , w1new ,  xa , ya , y_a , 0.001 ,0.0000001 )
        learn( 2 ,  w1, w2 , w2new ,  xa , ya , y_a , 0.001 ,0.0000001 )
        matrixCopy( w1new ,    w1)
        matrixCopy( w2new ,    w2)
        if  (i +1)% 10000  == 0 :
            forwardnn( xa ,  w1 , w2 , ya )
            e = squareErrorAvg( y_a  , ya )
            print ('w1 learned  by myself @time(s)  ' , i+1 , '  : ' ,  w1new )
            print ('w2 learned  by myself @time(s)  ' ,  i +1, '  : '  ,  w2new )
            print('loss  by myself @time(s)  ' ,  i +1, '  : '   , e) 
    print( "训练结果(myself):" )
    print("w1:", w1 )
    print("w2:", w2 )
    per  = directValidateMain( w1 , w2 )
    print("Accury by myslef:" , per ) 
    return w1,w2

直接使用Tensorflow训练的代码如下,该代码参考https://github.com/cj0012/AI-Practice-Tensorflow-Notes/blob/master/tf.zip 的tf3_6.py 修改。

''' main for training using tensorflow '''
def  tensorflowMain() :    
    #1定义神经网络的输入、参数和输出,定义前向传播过程。
    x = tf.placeholder(tf.float32, shape=(None, 2))
    y_= tf.placeholder(tf.float32, shape=(None, 1))

    w1= tf.Variable( [[1.0,1.0,1.0],[2.0,1.0,1.0]])
    w2= tf.Variable([[2.0],[3.0],[5.0]])

    a = tf.matmul(x, w1)
    y = tf.matmul(a, w2)

    #2定义损失函数及反向传播方法。
    loss_mse = tf.reduce_mean(tf.square(y-y_)) 
    train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss_mse)
    #3生成会话,训练STEPS轮
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
          
        # 训练模型。
        STEPS = 3000
        for i in range(STEPS):
            start = (i*BATCH_SIZE) % SAMPLE_SIZE 
            end =  start + BATCH_SIZE
            sess.run(train_step, feed_dict={x: X[start:end], y_: Y_[start:end]})
            if  (i+1) % 10000 == 0  :
                total_loss = sess.run(loss_mse, feed_dict={x: X[start:end], y_: Y_[start:end]})                
                print ("w1 after trained " , i+1 , " time(s)  by tensorflow:\n", sess.run(w1))
                print ("w2 after trained " , i +1, " time(s)  by tensorflow:\n", sess.run(w2))
                print("After %d training step(s), loss_mse  is %g" % (i+1, total_loss))
        print( "训练结果(Tensorflow):" )
        r_w1 = sess.run(w1)
        r_w2 = sess.run(w2)
        print("w1:", r_w1  )
        print("w2:", r_w2 )

        #validate
        rv = sess.run( y , feed_dict={x:XT } )
        per,accAr = calAccury( rv )
        print("Tensorflow accury:" , per  )       
        
        return r_w1 , r_w2 

主函数如下,根据各自返回的 w1 和 w2 计算各元素差值之和,以验证这两种学习的结果是否有重大差别。

w11 , w12 = directLearnMain()        
w21, w22 = tensorflowMain()
w1diff = w11-w21
w2diff = w12-w22
d1s = np.sum( abs(w1diff) )
d2s = np.sum( abs(w2diff) )
print( "w1 diff sum" , d1s )
print( "w2 diff sum" , d2s )

下面是一次运行结果,可以看出两种学习出来的w1 、w2相差很小。

训练结果(myself):
w1: [[1.1226987  1.88681821 2.74459457]
 [0.73658331 0.59952774 0.8687756 ]]
w2: [[1.28324997]
 [1.67933366]
 [2.44031197]]
Accury by myslef: 0.90625
训练结果(Tensorflow):
w1: [[1.1226883  1.8868158  2.7446    ]
 [0.73657435 0.59952295 0.86878556]]
w2: [[1.2832366]
 [1.6793287]
 [2.4403226]]
Tensorflow accury: 0.90625
w1 diff sum 4.2017758479784284e-05
w2 diff sum 2.8995841911072517e-05

阅读更多
文章标签: python tensorflow
个人分类: python tensorflow
上一篇tensorflow 输出一个2x3 的矩阵
下一篇将mnist数据集按标签起文件名输出为一系列png文件
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

关闭
关闭
关闭