训练权重为nan

异或问题训练出来的值都是nan

# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.set_random_seed(55)
np.random.seed(55)

input_data = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]  # XOR input
output_data = [[0.], [1.], [1.], [0.]]  # XOR output


hidden_nodes =2

n_input = tf.placeholder(tf.float32, shape=[None, 2], name="n_input")
n_output = tf.placeholder(tf.float32, shape=[None, 1], name="n_output")

# hidden layer's bias neuron
b_hidden = tf.Variable(0.1, name="hidden_bias")


W_hidden = tf.Variable(tf.random_normal([2, hidden_nodes]), name="hidden_weights")

hidden = tf.sigmoid(tf.matmul(n_input, W_hidden) + b_hidden)

################
# output layer #
################
W_output = tf.Variable(tf.random_normal([hidden_nodes, 1]), name="output_weights")  # output layer's weight matrix


#不影响
b_output =  tf.Variable(0.1, name="output_bias")#




output = tf.nn.relu(tf.matmul(hidden, W_output)+b_output)  # 出来的都是nan calc output layer's activation

#softmax
y = tf.matmul(hidden, W_output)+b_output
output = tf.nn.softmax(tf.matmul(hidden, W_output)+b_output)



#交叉熵
loss = -(n_output * tf.log(output) + (1 - n_output) * tf.log(1 - output))


optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)  # let the optimizer train

#####################
# train the network #
#####################
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(0, 2001):
        # run the training operation
        cvalues = sess.run([train, loss, W_hidden, b_hidden, W_output],
                       feed_dict={n_input: input_data, n_output: output_data})

    # print some debug stuff
        if epoch % 200 == 0:
            print("")
            print("step: {:>3}".format(epoch))
            print("loss: {}".format(cvalues[1]))
            # print("b_hidden: {}".format(cvalues[3]))
            # print("W_hidden: {}".format(cvalues[2]))
            # print("W_output: {}".format(cvalues[4]))


    print("")
    print("input: {} | output: {}".format(input_data[0], sess.run(output, feed_dict={n_input: [input_data[0]]})))
    print("input: {} | output: {}".format(input_data[1], sess.run(output, feed_dict={n_input: [input_data[1]]})))
    print("input: {} | output: {}".format(input_data[2], sess.run(output, feed_dict={n_input: [input_data[2]]})))
    print("input: {} | output: {}".format(input_data[3], sess.run(output, feed_dict={n_input: [input_data[3]]})))

交叉熵需要使用独热编码这里有三个改法1.将n_output转换为独热编码见如下代码

2.使用sparse_softmax_cross_entropy_with_logits 3.损失函数loss改为mse

# -*- coding: utf-8 -*-
"""

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.set_random_seed(55)
np.random.seed(55)

input_data = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]  # XOR input
output_data = [[1,0], [0,1], [0,1], [1,0]]  # XOR output


hidden_nodes =2

n_input = tf.placeholder(tf.float32, shape=[None, 2], name="n_input")
n_output = tf.placeholder(tf.float32, shape=[None, 2], name="n_output")

# hidden layer's bias neuron
b_hidden = tf.Variable(0.1, name="hidden_bias")


W_hidden = tf.Variable(tf.random_normal([2, hidden_nodes]), name="hidden_weights")

hidden = tf.sigmoid(tf.matmul(n_input, W_hidden) + b_hidden)

################
# output layer #
################
W_output = tf.Variable(tf.random_normal([hidden_nodes, 2]), name="output_weights")  # output layer's weight matrix


#不影响
b_output =  tf.Variable([0.1,0.1], name="output_bias")#




output = tf.nn.relu(tf.matmul(hidden, W_output)+b_output)  # 出来的都是nan calc output layer's activation

#softmax
y = tf.matmul(hidden, W_output)+b_output
output = tf.nn.softmax(tf.matmul(hidden, W_output)+b_output)



#交叉熵
loss = -tf.reduce_sum((n_output * tf.log(output) + (1 - n_output) * tf.log(1 - output)))


optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)  # let the optimizer train

#####################
# train the network #
#####################
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(0, 2001):
        # run the training operation
        cvalues = sess.run([train, loss, W_hidden, b_hidden, W_output],
                       feed_dict={n_input: input_data, n_output: output_data})

    # print some debug stuff
        if epoch % 200 == 0:
            print("")
            print("step: {:>3}".format(epoch))
            print("loss: {}".format(cvalues[1]))
            # print("b_hidden: {}".format(cvalues[3]))
            # print("W_hidden: {}".format(cvalues[2]))
            # print("W_output: {}".format(cvalues[4]))


    print("")
    print("input: {} | output: {}".format(input_data[0], sess.run(output, feed_dict={n_input: [input_data[0]]})))
    print("input: {} | output: {}".format(input_data[1], sess.run(output, feed_dict={n_input: [input_data[1]]})))
    print("input: {} | output: {}".format(input_data[2], sess.run(output, feed_dict={n_input: [input_data[2]]})))
    print("input: {} | output: {}".format(input_data[3], sess.run(output, feed_dict={n_input: [input_data[3]]})))

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值