炼数成金Tensorflow学习笔记之4.1_交叉熵
代码及分析
"""
Created on Fri Mar 20 09:57:30 2020
@author: 寒火qwer
"""
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('.\MNIST_data', one_hot=True)
batch_size = 100
n_batch = mnist.train.num_examples // batch_size
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
w = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
p = tf.matmul(x,w) + b
prediction = tf.nn.softmax(p)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=p))
train_op = tf.train.GradientDescentOptimizer(0.2).minimize(loss)
init_op = tf.global_variables_initializer()
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(init_op)
for epoch in range(21):
for batch in range(n_batch):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(train_op, feed_dict={x:batch_x, y:batch_y})
acc = sess.run(accuracy, feed_dict= {x:mnist.test.images, y:mnist.test.labels})
print("iter" + str(epoch) + ", testing acc: " + str(acc))
'''
运行结果:
Extracting .\MNIST_data\train-images-idx3-ubyte.gz
Extracting .\MNIST_data\train-labels-idx1-ubyte.gz
Extracting .\MNIST_data\t10k-images-idx3-ubyte.gz
Extracting .\MNIST_data\t10k-labels-idx1-ubyte.gz
iter0, testing acc: 0.9113
iter1, testing acc: 0.9179
iter2, testing acc: 0.9202
iter3, testing acc: 0.9194
iter4, testing acc: 0.9214
iter5, testing acc: 0.9198
iter6, testing acc: 0.922
iter7, testing acc: 0.9234
iter8, testing acc: 0.9229
iter9, testing acc: 0.9232
iter10, testing acc: 0.9224
iter11, testing acc: 0.9243
iter12, testing acc: 0.9239
iter13, testing acc: 0.9243
iter14, testing acc: 0.924
iter15, testing acc: 0.9228
iter16, testing acc: 0.9249
iter17, testing acc: 0.9225
iter18, testing acc: 0.9227
iter19, testing acc: 0.9228
iter20, testing acc: 0.925
'''
- tf.nn.softmax_cross_entropy_with_logits(_sentinel=None, labels=None, logits=None, dim=-1, name=None)
作用:用于定义多分类问题的loss。
labels: 训练数据的标记真值,每一行为一个概率分布(可以直接使用one-hot编码)
logits: 训练数据的预测值,为未缩放的概率值(即加起来和不为1),执行此函数时会在内部自动执行softmax。即该参数直接传入最后一层fc后的未经softmax的结果即可。
import tensorflow as tf
labels = [[0,1,0],[1,0,0],[0,0,1]]
logits = [[3.0,7.8,0.2],[2.5,0.3,0.6],[2.7,3.6,5.4]]
logits_scaled = tf.nn.softmax(logits)
loss = tf.nn.softmax_cross_entropy_with_logits(labels = labels, logits = logits)
loss1 = -tf.reduce_sum(labels*tf.log(logits_scaled),1)
with tf.Session() as sess:
print(sess.run(loss))
print(sess.run(loss1))
'''
[ 0.00869229 0.23140666 0.20904817]
[ 0.00869226 0.23140666 0.2090482 ]
'''
- tf.nn.sparse_softmax_cross_entropy_with_logits(_sentinel=None, labels=None, logits=None, name=None)
作用:用于定义多分类问题的loss。
该函数与上一个函数唯一的区别在于labels,该函数的labels为排他的类别号(0,1,2,3这种),而不是类似于one-hot编码的概率向量。
import tensorflow as tf
labels = [1,0,2]
logits = [[3.0,7.8,0.2],[2.5,0.3,0.6],[2.7,3.6,5.4]]
labels_one_hot = [[0,1,0],[1,0,0],[0,0,1]]
logits_scaled = tf.nn.softmax(logits)
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = labels, logits = logits)
loss1 = -tf.reduce_sum(labels_one_hot*tf.log(logits_scaled),1)
with tf.Session() as sess:
print(sess.run(loss))
print(sess.run(loss1))
'''
[ 0.00869229 0.23140666 0.20904817]
[ 0.00869226 0.23140666 0.2090482 ]
'''