损失函数是传统的多分类交叉熵
import numpy as np
def getRandomIndex(n, x):
# 索引范围为[0, n),随机选x个不重复,注意replace=False才是不重复,replace=True则有可能重复
index = np.random.choice(np.arange(n), size=x, replace=False)
return index
from keras.datasets import mnist
from keras.utils import np_utils
import numpy as np
import matplotlib.pyplot as plt
(train_x,train_y),(test_x,test_y)=mnist.load_data()
for i in range(16):
plt.subplot(4,4,i+1)
a = np.random.randint(0,60000)
plt.imshow(train_x[a], cmap='gray', interpolation =None)
plt.title("Class{}".format(train_y[a]))
plt.show()
train_x = train_x/255.0
test_x = test_x/255.0
train_x = train_x.reshape(60000,784)
test_x = test_x.reshape(10000,784)
train_y = np_utils.to_categorical(train_y, 10)
test_y =np_utils.to_categorical(test_y, 10)
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, [None, 784])
nums = 1024
W_hidden = tf.Variable(tf.truncated_normal([784,nums]))
b_hidden = tf.Variable(tf.zeros([nums]))
hidden = tf.nn.relu(tf.matmul(x, W_hidden) + b_hidden)
W = tf.Variable(tf.zeros([nums, 10]))
b = tf.Variable(tf.zeros([10])) # 构建一个变量,代表偏置,初始化为0
y = tf.nn.softmax(tf.matmul(hidden, W) + b)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
tf.global_variables_initializer().run()
for i in range(2000):
index = getRandomIndex(60000, 100)
batch_xs, batch_ys = train_x[index],train_y[index] # 使用minibatch,一个batch大小为100
train_step.run({x: batch_xs, y_: batch_ys})
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # tf.argmax()返回的是某一维度上其数据最大所在的索引值,在这里即代表预测值和真值
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 用平均值来统计测试准确率
print(accuracy.eval({x: test_x, y_: test_y})) # 打印测试信息
tf.matmul(x, W_hidden) + b_hidden
- 上面的张量是(None,128)+(128,)
- 很神奇:我觉得那就是每行都加上128个偏置的意思吧!
- 如果我把初值都搞成0,并且隐层激活改成sigmoid,却还是最后训练很差!
- 悲呼!
- 我把上面的代码重新搞了两次
- 看看W.eval()的值,他们不一样,这意味着这个神经网络存在局部极值点
W.eval()
Out[2]:
array([[ 0.02622618, 0.0915871 , 0.05437365, ..., 0.03095513,
0.02360716, -0.09277936],
[ 0.00729493, -0.0614912 , 0.00583134, ..., -0.04763368,
0.01732651, -0.02369016],
[-0.03202496, -0.04194519, -0.0152871 , ..., 0.00495494,
-0.06705572, 0.0539442 ],
...,
[ 0.0230215 , 0.01732747, -0.0229557 , ..., -0.05822266,
-0.02907027, -0.04704115],
[ 0.00426122, -0.02852006, 0.01155535, ..., 0.01701677,
0.00288148, 0.01234128],
[-0.01905017, -0.0064229 , -0.03893813, ..., -0.0019826 ,
0.03670788, 0.00873182]], dtype=float32)
runfile('C:/Users/ZTZ/Desktop/untitled0.py', wdir='C:/Users/ZTZ/Desktop')
D:\software\Anaconda3\envs\base_tensorflow\lib\site-packages\tensorflow\python\client\session.py:1751: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
0.9701
W.eval()
Out[4]:
array([[-0.01211673, 0.02576707, 0.04460295, ..., 0.00835698,
-0.00926638, -0.05525181],
[-0.02516948, 0.00751439, 0.02384738, ..., -0.01638818,
0.00737585, 0.00152852],
[ 0.03023481, -0.0515671 , -0.05224042, ..., -0.08079564,
0.05317097, -0.03058143],
...,
[ 0.05854166, -0.01895202, -0.05466215, ..., -0.00664349,
-0.05537104, 0.02658511],
[-0.00801813, -0.02416388, -0.01407447, ..., 0.02684276,
-0.04101599, 0.0879845 ],
[-0.01473197, 0.02697607, 0.04069086, ..., -0.00403915,
-0.03310548, -0.10398304]], dtype=float32)
损失函数是我发明的多分类交叉熵
import numpy as np
def getRandomIndex(n, x):
# 索引范围为[0, n),随机选x个不重复,注意replace=False才是不重复,replace=True则有可能重复
index = np.random.choice(np.arange(n), size=x, replace=False)
return index
from keras.datasets import mnist
from keras.utils import np_utils
import numpy as np
import matplotlib.pyplot as plt
(train_x,train_y),(test_x,test_y)=mnist.load_data()
for i in range(16):
plt.subplot(4,4,i+1)
a = np.random.randint(0,60000)
plt.imshow(train_x[a], cmap='gray', interpolation =None)
plt.title("Class{}".format(train_y[a]))
plt.show()
train_x = train_x/255.0
test_x = test_x/255.0
train_x = train_x.reshape(60000,784)
test_x = test_x.reshape(10000,784)
train_y = np_utils.to_categorical(train_y, 10)
test_y =np_utils.to_categorical(test_y, 10)
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, [None, 784])
nums = 1024
W_hidden = tf.Variable(tf.truncated_normal([784,nums]))
b_hidden = tf.Variable(tf.zeros([nums]))
hidden = tf.nn.relu(tf.matmul(x, W_hidden) + b_hidden)
W = tf.Variable(tf.zeros([nums, 10]))
b = tf.Variable(tf.zeros([10])) # 构建一个变量,代表偏置,初始化为0
y = tf.nn.softmax(tf.matmul(hidden, W) + b)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = -tf.reduce_sum(y_ * tf.log(y) + (1-y_) * tf.log(1-y))
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
tf.global_variables_initializer().run()
for i in range(2000):
index = getRandomIndex(60000, 100)
batch_xs, batch_ys = train_x[index],train_y[index] # 使用minibatch,一个batch大小为100
train_step.run({x: batch_xs, y_: batch_ys})
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) # tf.argmax()返回的是某一维度上其数据最大所在的索引值,在这里即代表预测值和真值
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 用平均值来统计测试准确率
print(accuracy.eval({x: test_x, y_: test_y})) # 打印测试信息
- 居然才这么点得分,0.098