TS2搭建两种损失函数隐层的softmax实现手写数字分类

最新推荐文章于 2022-08-21 18:31:51 发布

fgh431

最新推荐文章于 2022-08-21 18:31:51 发布

阅读量223

点赞数 3

分类专栏： python+py清

本文链接：https://blog.csdn.net/zhoutianzi12/article/details/112731092

版权

python+py清专栏收录该内容

41 篇文章 0 订阅

订阅专栏

文章目录

损失函数是传统的多分类交叉熵
损失函数是我发明的多分类交叉熵

损失函数是传统的多分类交叉熵

import numpy as np


def getRandomIndex(n, x):
	# 索引范围为[0, n)，随机选x个不重复，注意replace=False才是不重复，replace=True则有可能重复
    index = np.random.choice(np.arange(n), size=x, replace=False)
    return index

from keras.datasets import mnist
from keras.utils import np_utils
import numpy as np
import matplotlib.pyplot as plt



(train_x,train_y),(test_x,test_y)=mnist.load_data()






for i in range(16):
    plt.subplot(4,4,i+1)
    a = np.random.randint(0,60000)
    plt.imshow(train_x[a], cmap='gray', interpolation =None)
    plt.title("Class{}".format(train_y[a]))
    plt.show()




train_x = train_x/255.0
test_x = test_x/255.0


train_x  = train_x.reshape(60000,784)
test_x  = test_x.reshape(10000,784)


train_y = np_utils.to_categorical(train_y, 10)
test_y =np_utils.to_categorical(test_y, 10)


import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

sess = tf.InteractiveSession()


x = tf.placeholder(tf.float32, [None, 784])  

nums = 1024

W_hidden =  tf.Variable(tf.truncated_normal([784,nums]))
b_hidden = tf.Variable(tf.zeros([nums]))

hidden = tf.nn.relu(tf.matmul(x, W_hidden) + b_hidden)



W = tf.Variable(tf.zeros([nums, 10]))  
b = tf.Variable(tf.zeros([10]))  # 构建一个变量，代表偏置，初始化为0




y = tf.nn.softmax(tf.matmul(hidden, W) + b)  





y_ = tf.placeholder(tf.float32, [None, 10])

cross_entropy = -tf.reduce_sum(y_ * tf.log(y))


train_step = tf.train.AdamOptimizer().minimize(cross_entropy)



tf.global_variables_initializer().run()


for i in range(2000):  
    index = getRandomIndex(60000, 100)
    batch_xs, batch_ys = train_x[index],train_y[index]  # 使用minibatch，一个batch大小为100

    train_step.run({x: batch_xs, y_: batch_ys})


correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))  # tf.argmax()返回的是某一维度上其数据最大所在的索引值，在这里即代表预测值和真值
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  # 用平均值来统计测试准确率
print(accuracy.eval({x: test_x, y_: test_y}))  # 打印测试信息

tf.matmul(x, W_hidden) + b_hidden
上面的张量是(None,128)+(128,)
很神奇：我觉得那就是每行都加上128个偏置的意思吧！

如果我把初值都搞成0，并且隐层激活改成sigmoid，却还是最后训练很差！
悲呼！

我把上面的代码重新搞了两次
看看W.eval()的值，他们不一样，这意味着这个神经网络存在局部极值点

W.eval()
Out[2]: 
array([[ 0.02622618,  0.0915871 ,  0.05437365, ...,  0.03095513,
         0.02360716, -0.09277936],
       [ 0.00729493, -0.0614912 ,  0.00583134, ..., -0.04763368,
         0.01732651, -0.02369016],
       [-0.03202496, -0.04194519, -0.0152871 , ...,  0.00495494,
        -0.06705572,  0.0539442 ],
       ...,
       [ 0.0230215 ,  0.01732747, -0.0229557 , ..., -0.05822266,
        -0.02907027, -0.04704115],
       [ 0.00426122, -0.02852006,  0.01155535, ...,  0.01701677,
         0.00288148,  0.01234128],
       [-0.01905017, -0.0064229 , -0.03893813, ..., -0.0019826 ,
         0.03670788,  0.00873182]], dtype=float32)

runfile('C:/Users/ZTZ/Desktop/untitled0.py', wdir='C:/Users/ZTZ/Desktop')
D:\software\Anaconda3\envs\base_tensorflow\lib\site-packages\tensorflow\python\client\session.py:1751: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).
  warnings.warn('An interactive session is already active. This can '
0.9701

W.eval()
Out[4]: 
array([[-0.01211673,  0.02576707,  0.04460295, ...,  0.00835698,
        -0.00926638, -0.05525181],
       [-0.02516948,  0.00751439,  0.02384738, ..., -0.01638818,
         0.00737585,  0.00152852],
       [ 0.03023481, -0.0515671 , -0.05224042, ..., -0.08079564,
         0.05317097, -0.03058143],
       ...,
       [ 0.05854166, -0.01895202, -0.05466215, ..., -0.00664349,
        -0.05537104,  0.02658511],
       [-0.00801813, -0.02416388, -0.01407447, ...,  0.02684276,
        -0.04101599,  0.0879845 ],
       [-0.01473197,  0.02697607,  0.04069086, ..., -0.00403915,
        -0.03310548, -0.10398304]], dtype=float32)

损失函数是我发明的多分类交叉熵


import numpy as np


def getRandomIndex(n, x):
	# 索引范围为[0, n)，随机选x个不重复，注意replace=False才是不重复，replace=True则有可能重复
    index = np.random.choice(np.arange(n), size=x, replace=False)
    return index


from keras.datasets import mnist
from keras.utils import np_utils
import numpy as np
import matplotlib.pyplot as plt



(train_x,train_y),(test_x,test_y)=mnist.load_data()






for i in range(16):
    plt.subplot(4,4,i+1)
    a = np.random.randint(0,60000)
    plt.imshow(train_x[a], cmap='gray', interpolation =None)
    plt.title("Class{}".format(train_y[a]))
    plt.show()




train_x = train_x/255.0
test_x = test_x/255.0


train_x  = train_x.reshape(60000,784)
test_x  = test_x.reshape(10000,784)


train_y = np_utils.to_categorical(train_y, 10)
test_y =np_utils.to_categorical(test_y, 10)


import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

sess = tf.InteractiveSession()


x = tf.placeholder(tf.float32, [None, 784])  

nums = 1024

W_hidden =  tf.Variable(tf.truncated_normal([784,nums]))
b_hidden = tf.Variable(tf.zeros([nums]))

hidden = tf.nn.relu(tf.matmul(x, W_hidden) + b_hidden)



W = tf.Variable(tf.zeros([nums, 10]))  
b = tf.Variable(tf.zeros([10]))  # 构建一个变量，代表偏置，初始化为0




y = tf.nn.softmax(tf.matmul(hidden, W) + b)  





y_ = tf.placeholder(tf.float32, [None, 10])

cross_entropy = -tf.reduce_sum(y_ * tf.log(y) + (1-y_) * tf.log(1-y))



train_step = tf.train.AdamOptimizer().minimize(cross_entropy)



tf.global_variables_initializer().run()


for i in range(2000):  
    index = getRandomIndex(60000, 100)
    batch_xs, batch_ys = train_x[index],train_y[index]  # 使用minibatch，一个batch大小为100

    train_step.run({x: batch_xs, y_: batch_ys})


correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))  # tf.argmax()返回的是某一维度上其数据最大所在的索引值，在这里即代表预测值和真值
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  # 用平均值来统计测试准确率
print(accuracy.eval({x: test_x, y_: test_y}))  # 打印测试信息