Tensorflow 里通过softmax后再计算交叉熵有2个函数:
1 softmax_cross_entropy_with_logits_v2
2 sparse_softmax_cross_entropy_with_logits
均涉及两个参数,logits 和 labels ,logits 一般是神经网络的输出,是一个batch乘待分类类型数的二维张量。而labels是已知的标注,同样shape的张量。第二个函数的labels是稀疏表示的labels,因而是一维张量。
设logits1=[ [0.2, 0.1, 0.9]] ,labels1=[[0, 0, 1]],通过softmax_cross_entropy_with_logits_v2(logits=logits1,labels=labels1)的计算过程如下:
对[0.2, 0.1, 0.9]运算softmax ,得到 sm= [0.25519383 0.23090893 0.51389724] ,然后,
交叉熵= — ( log(sm[0])*labels1[0][0] + log(sm[1])*labels1[0][1] + log(sm[2])*labels1[0][2] ).请看下面的代码例子,分别使用提供的两个函数算交叉熵,还有通过自己调用softmax算,然后直接根据上式计算,三者计算结果一致。
import tensorflow as tf
import math
input_data = tf.Variable([ [0.2, 0.1, 0.9] , [0.2, 0.1, 0.9], [0.7, 0.4, 0.1]], dtype=tf.float32)
labels1=[[0, 0, 1], [0, 1, 0] , [1, 0, 0]]
output = tf.nn.softmax_cross_entropy_with_logits_v2(logits=input_data, labels=labels1 )
labels2 = tf.argmax(labels1,1 )
output1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=input_data, labels=labels2 )
sm = tf.nn.softmax ( input_data )
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
our = sess.run(output)
print('softmax_cross_entropy_with_logits_v2:\n', our)
our1 = sess.run(output1)
print('sparse_softmax_cross_entropy_with_logits:\n', our)
inp= sess.run( input_data )
smv = sess.run( sm )
print( 'softmax output:\n', smv )
l2 = sess.run( labels2 )
print ('argmax output(sparse labels)', l2 )
for k in range(len(inp) ) :
x = 0.0
for i in range(len(inp[0])) :
x += math.log( smv[k][i] ) * labels1[k][i]
x = -x
print( 'softmax_cross_entropy_with_logits by my computing for input ' , k ,':', x )
#print( 'tensorflow == myComputing?' , math.fabs( x - our[k] ) < 0.001 )
输出结果:
softmax_cross_entropy_with_logits_v2:
[0.6657319 1.4657319 0.8283902]
sparse_softmax_cross_entropy_with_logits:
[0.6657319 1.4657319 0.8283902]
softmax output:
[[0.25519383 0.23090893 0.51389724]
[0.25519383 0.23090893 0.51389724]
[0.4367518 0.3235537 0.23969449]]
argmax output(sparse labels) [2 1 0]
softmax_cross_entropy_with_logits by my computing for input 0 : 0.6657319553780278
softmax_cross_entropy_with_logits by my computing for input 1 : 1.4657318873121323
softmax_cross_entropy_with_logits by my computing for input 2 : 0.8283901795553891
有四种方法计算交叉熵,在正常情况下他们的值相等,但遇到极小值值时,会不一样:
#4 methods to calculate cross entropy , First without clip to ynew ,
#Second cliped manually
#Third/Forth with clip inside themself , but the clip area is not clear
cross_entropy1 = -tf.reduce_mean( tf.reduce_sum( y_*tf.log(ynew), 1 ) )
ynew1 = tf.clip_by_value(ynew,1e-36,1.0)
cross_entropy2 = -tf.reduce_mean( tf.reduce_sum( y_*tf.log(ynew1), 1 ) )
cross_entropy3 = tf.reduce_mean (tf.nn.softmax_cross_entropy_with_logits_v2( logits = y3n , labels=y_ ) )
cross_entropy4 = tf.reduce_mean (tf.nn.sparse_softmax_cross_entropy_with_logits(\
logits = y3n , labels=tf.argmax(y_,1) ) )
cross_entropy1会溢出,cross_entropy3使用softmax_cross_entropy_with_logits_v2得到的交叉熵在极端下也是非常的大(6.72853e+30),而当我们用 cross_entropy2方法,将输入的分量限制在(1e-36,1.0)时,得到的交叉熵是个可继续训练的值(82.89307)
请看下面的代码:
#file crossEntroyTest.py
import tensorflow as tf
import math
print('exp(lar)' , math.exp( - 6.72853e+30 ) )
print(1.2e+3)
def testCrossEbtropyNormalData() :
y_ = tf.Variable( [[0. , 0., 0., 0., 1., 0., 0., 0., 0., 0.],\
[0., 0., 1. ,0., 0., 0., 0. ,0., 0., 0.]])
ynew = tf.Variable( [[0.09775153, 0.10184532, 0.10208829, 0.0985443, \
0.09175218, 0.10402172,\
0.10551486, 0.09878179, 0.10147586 ,0.09822416],\
[0.09767619, 0.10240721, 0.09688035, 0.09931276, \
0.09407537, 0.10758881,\
0.10283026, 0.09836568, 0.10210507, 0.09875835]]) #ynew= tf.nn.softmax( y3n ) ,
#calculated by englishHndUsingCnn.py in NN training
y3n = tf.Variable( [[-0.02758069, 0.01344561 , 0.01582842, \
-0.0195034, -0.09091826, 0.03459012,\
0.04884226, -0.01709634 , 0.00981142 ,-0.02275741],\
[-0.02703298, 0.02026634, -0.03521407, -0.0104167,\
-0.06459457, 0.06962582,\
0.02438892, -0.01999882, 0.01731157, -0.01601485]])
cross_entropy = -tf.reduce_mean( tf.reduce_sum( y_*tf.log(ynew), 1 ) )
cross_entropy1 = tf.reduce_mean (tf.nn.softmax_cross_entropy_with_logits_v2( logits = y3n , labels=y_ ) )
cross_entropy2 = tf.reduce_mean (tf.nn.sparse_softmax_cross_entropy_with_logits(\
logits = y3n , labels=tf.argmax(y_,1) ) )
ynews = tf.nn.softmax( y3n )
sum1 = tf.reduce_mean( tf.reduce_sum( ynews,1 ))
equalBool = tf.subtract( ynews , ynew )
equalBool = tf.abs( equalBool )
equalBool = tf.less( equalBool , 0.000001 )
equalFloat = tf.cast( equalBool ,'float32')
equNum = tf.reduce_sum( equalFloat )
with tf.Session() as sess :
sess.run( tf.global_variables_initializer() )
r = sess.run( cross_entropy )
print('crs' , r )
print( 'crs1' , sess.run( cross_entropy1 ) )
print( 'crs2' , sess.run( cross_entropy2 ) )
ynewsR = sess.run( ynews )
print('ynews:', ynewsR )
print('SoftMax' , sess.run( sum1) )
print('soft equal ?' , sess.run( equNum ) )
'''
crs 2.3614712
crs1 2.3614712
crs2 2.3614712
ynews: [[0.09775153 0.10184532 0.10208829 0.0985443 0.09175218 0.10402172
0.10551486 0.09878179 0.10147586 0.09822416]
[0.09767619 0.10240721 0.09688035 0.09931276 0.09407537 0.10758881
0.10283026 0.09836569 0.10210507 0.09875835]]
SoftMax 1.0
soft equal ? 20.0
'''
def testCrossEbtropyAbnormalData() :
y_ = tf.Variable( [[0.,0.,0.,1.,0.,0.,0.,0.,0.,0.],
[0.,0.,0.,0.,1.,0.,0.,0.,0.,0.]])
ynew = tf.Variable( [[0.,0.,0.,0.,0.,1.,0.,0.,0.,0.],
[0.,0.,0.,0.,0.,1.,0.,0.,0.,0.]])
y3n = tf.Variable( [[-2.4375510e+23 ,2.0621505e+22,-7.2626525e+23, 1.5664488e+31
, 3.5969707e+24, 1.5914237e+31,-3.2679661e+22, 2.5958722e+24
, 5.2547755e+23,-3.1578729e+31],
[-2.0226255e+23,1.7113962e+22,-6.0270867e+23, 1.3000047e+31
, 2.9850567e+24, 1.3207314e+31,-2.7118364e+22, 2.1541442e+24
, 4.3611144e+23,-2.6207365e+31]])
#4 methods to calculate cross entropy , First without clip to ynew ,
#Second cliped manually
#Third/Forth with clip inside themself , but the clip area is not clear
cross_entropy1 = -tf.reduce_mean( tf.reduce_sum( y_*tf.log(ynew), 1 ) )
ynew1 = tf.clip_by_value(ynew,1e-36,1.0)
cross_entropy2 = -tf.reduce_mean( tf.reduce_sum( y_*tf.log(ynew1), 1 ) )
cross_entropy3 = tf.reduce_mean (tf.nn.softmax_cross_entropy_with_logits_v2( logits = y3n , labels=y_ ) )
cross_entropy4 = tf.reduce_mean (tf.nn.sparse_softmax_cross_entropy_with_logits(\
logits = y3n , labels=tf.argmax(y_,1) ) )
ynews = tf.nn.softmax( y3n )
sum1 = tf.reduce_mean( tf.reduce_sum( ynews,1 ))
equalBool = tf.subtract( ynews , ynew )
equalBool = tf.abs( equalBool )
equalBool = tf.less( equalBool , 0.000001 )
equalFloat = tf.cast( equalBool ,'float32')
equNum = tf.reduce_sum( equalFloat )
with tf.Session() as sess :
sess.run( tf.global_variables_initializer() )
print( 'crs1' , sess.run( cross_entropy1 ) )
print( 'crs2' , sess.run( cross_entropy2 ) )
print( 'crs3' , sess.run( cross_entropy3 ) )
print( 'crs4' , sess.run( cross_entropy4 ) )
ynewsR = sess.run( ynews )
print('ynews:', ynewsR )
print('SoftMax' , sess.run( sum1) )
print('soft equal ?' , sess.run( equNum ) )
'''
soft equal ? 20.0
crs1 nan
crs2 82.89307
crs3 6.72853e+30
crs4 6.72853e+30
ynews: [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
SoftMax 1.0
soft equal ? 20.0
'''
testCrossEbtropyNormalData()
testCrossEbtropyAbnormalData()