异或问题训练出来的值都是nan
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
tf.set_random_seed(55)
np.random.seed(55)
input_data = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]
output_data = [[0.], [1.], [1.], [0.]]
hidden_nodes =2
n_input = tf.placeholder(tf.float32, shape=[None, 2], name="n_input")
n_output = tf.placeholder(tf.float32, shape=[None, 1], name="n_output")
b_hidden = tf.Variable(0.1, name="hidden_bias")
W_hidden = tf.Variable(tf.random_normal([2, hidden_nodes]), name="hidden_weights")
hidden = tf.sigmoid(tf.matmul(n_input, W_hidden) + b_hidden)
W_output = tf.Variable(tf.random_normal([hidden_nodes, 1]), name="output_weights")
b_output = tf.Variable(0.1, name="output_bias")
output = tf.nn.relu(tf.matmul(hidden, W_output)+b_output)
y = tf.matmul(hidden, W_output)+b_output
output = tf.nn.softmax(tf.matmul(hidden, W_output)+b_output)
loss = -(n_output * tf.log(output) + (1 - n_output) * tf.log(1 - output))
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(0, 2001):
cvalues = sess.run([train, loss, W_hidden, b_hidden, W_output],
feed_dict={n_input: input_data, n_output: output_data})
if epoch % 200 == 0:
print("")
print("step: {:>3}".format(epoch))
print("loss: {}".format(cvalues[1]))
print("")
print("input: {} | output: {}".format(input_data[0], sess.run(output, feed_dict={n_input: [input_data[0]]})))
print("input: {} | output: {}".format(input_data[1], sess.run(output, feed_dict={n_input: [input_data[1]]})))
print("input: {} | output: {}".format(input_data[2], sess.run(output, feed_dict={n_input: [input_data[2]]})))
print("input: {} | output: {}".format(input_data[3], sess.run(output, feed_dict={n_input: [input_data[3]]})))
交叉熵需要使用独热编码这里有三个改法1.将n_output转换为独热编码见如下代码
2.使用sparse_softmax_cross_entropy_with_logits 3.损失函数loss改为mse
"""
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
tf.set_random_seed(55)
np.random.seed(55)
input_data = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]
output_data = [[1,0], [0,1], [0,1], [1,0]]
hidden_nodes =2
n_input = tf.placeholder(tf.float32, shape=[None, 2], name="n_input")
n_output = tf.placeholder(tf.float32, shape=[None, 2], name="n_output")
b_hidden = tf.Variable(0.1, name="hidden_bias")
W_hidden = tf.Variable(tf.random_normal([2, hidden_nodes]), name="hidden_weights")
hidden = tf.sigmoid(tf.matmul(n_input, W_hidden) + b_hidden)
W_output = tf.Variable(tf.random_normal([hidden_nodes, 2]), name="output_weights")
b_output = tf.Variable([0.1,0.1], name="output_bias")
output = tf.nn.relu(tf.matmul(hidden, W_output)+b_output)
y = tf.matmul(hidden, W_output)+b_output
output = tf.nn.softmax(tf.matmul(hidden, W_output)+b_output)
loss = -tf.reduce_sum((n_output * tf.log(output) + (1 - n_output) * tf.log(1 - output)))
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(0, 2001):
cvalues = sess.run([train, loss, W_hidden, b_hidden, W_output],
feed_dict={n_input: input_data, n_output: output_data})
if epoch % 200 == 0:
print("")
print("step: {:>3}".format(epoch))
print("loss: {}".format(cvalues[1]))
print("")
print("input: {} | output: {}".format(input_data[0], sess.run(output, feed_dict={n_input: [input_data[0]]})))
print("input: {} | output: {}".format(input_data[1], sess.run(output, feed_dict={n_input: [input_data[1]]})))
print("input: {} | output: {}".format(input_data[2], sess.run(output, feed_dict={n_input: [input_data[2]]})))
print("input: {} | output: {}".format(input_data[3], sess.run(output, feed_dict={n_input: [input_data[3]]})))