例如有n个样本x,每个样本有784个像素,即(n,784),隐藏层有256个神经元,在全连接中,每个样本x都要与各个神经元相乘
w的定义:
而神经元的参数w大小需要对应样本x的大小,如当一个样本即(1,784)进来的时候,需要与256个神经元即(784,256)做运算,得到(1,256)即256个结果,再与b相加,所以b大小为256。
n个样本,则
第一层隐藏层:(输入为(n,784)
- 参数w1:(784,256)
- 参数b1:(n,256)
第二层隐藏层:(输入为n,256) - 参数w2:(256,128)
- 参数b2:(n,128)
输出层:(输入为n,128) - 参数out:(128,10)
得到的输出为(n,10)的概率矩阵,每一行对应每个样本分类的概率大小
# coding=utf-8
import tensorflow as tf
import input_data
import numpy as np
import matplotlib.pyplot as plt
mnist = input_data.read_data_sets('data/',one_hot=True)
print("Mnist ready")
n_hidden1 = 256
n_hidden2 = 128
n_input = 784
n_classes = 10
x = tf.placeholder("float",[None,n_input])
y = tf.placeholder("float",[None,n_classes])
stddev = 0.1
weights = {
'w1' : tf.Variable(tf.random_normal([n_input,n_hidden1],stddev=stddev)),
'w2' : tf.Variable(tf.random_normal([n_hidden1,n_hidden2],stddev=stddev)),
'out': tf.Variable(tf.random_normal([n_hidden2,n_classes],stddev=stddev))
}
biases = {
'b1' : tf.Variable(tf.random_normal([n_hidden1])),
'b2' : tf.Variable(tf.random_normal([n_hidden2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
print("Network ready")
def multilayer_perceptorn(_X,_weights,_biases):
layer1 = tf.nn.sigmoid(tf.add(tf.matmul(_X,_weights['w1']),_biases['b1']))
layer2 = tf.nn.sigmoid(tf.add(tf.matmul(layer1, _weights['w2']), _biases['b2']))
return (tf.matmul(layer2,_weights['out']) + _biases['out'])
def plot(x1,y1,y2):
plt.plot(x1, y1)
plt.plot(x1, y2)
plt.xlabel("step")
plt.ylabel("acc")
plt.show()
# 前向传播
pred = multilayer_perceptorn(x,weights,biases)
# 交叉熵
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))
# 迭代器
optm = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(cost)
# 比较预测和真实值,计算精度
corr = tf.equal(tf.arg_max(pred,1),tf.arg_max(y,1))
accr = tf.reduce_mean((tf.cast(corr,"float")))
init = tf.global_variables_initializer()
print("Function ready")
train_epochs = 1000
batch_size = 100
step = 2
sess = tf.Session()
sess.run(init)
plot_y1=[]
plot_y2=[]
x1=[]
for epoch in range(train_epochs):
avg_cost = 0;
batch_num = int(mnist.train.num_examples/batch_size)
for i in range(batch_num):
batch_xs,batch_ys = mnist.train.next_batch(batch_size)
feeds = {x:batch_xs,y:batch_ys}
sess.run(optm,feed_dict=feeds)
avg_cost += sess.run(cost,feed_dict=feeds)
avg_cost = avg_cost/batch_num
# display
if(epoch+1)%step==0:
print ("epoch:%03d/%03d cost:%.9f"%(epoch,train_epochs,avg_cost))
feeds={x:batch_xs,y:batch_ys}
train_acc = sess.run(accr,feed_dict=feeds)
print ("train accuracy:%.3f"%(train_acc))
feeds = {x:mnist.train.images,y:mnist.train.labels}
test_acc = sess.run(accr,feed_dict=feeds)
print ("test accuracy:%.3f"%(test_acc))
plot_y1.append(train_acc)
plot_y2.append(test_acc)
x1.append(len(plot_y1))
#print(plot_y1,plot_y2)
plot(x1,plot_y1,plot_y2)
print ("Optimization finshed")
训练结果,测试集为橙色: