代码和相应的理解思路如下:
#-*- coding:utf-8 _*-
"""
@author:bluesli
@file: use_neural_network.py
@time: 2018/06/24
"""
'''
数据层n*784(灰度图只有一个通道,所以是784)
input_data(n*784)--->(w1,b1)(L1有256神经元)--->(w2,b2)(L2层有128个神经元)--->>out(多少个分类任务,输出就有几个)
w1=784*256 w2 = 256*128 b1 =256 b2 128(没有理解)
n_hidden_1 = 256
n_hedden_2 =128
n_Input = 784
n_classes =10
定义x,y
初始化权重和偏重
w(w1,w2,out)
b(b1,b2,out)
定义前向传播函数
算完一层函数后需要有激活函数(sigmoid,n模块中)
layer1,layer2
return 返回结果,不需要加激活函数,layer2,weight[out],biases[out] 得分值
定义反向传播函数;
cost 函数(交叉熵函数,softmax_cros_entopy_with_logits(pred,y))pred:前向传播值,y是label值
optimizer(优化器)梯度下降的优化器
corr(精度值)
转化精度为float类型,然后相加之后算出平均值;
'''
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
#获取各个种类数据
mnist = input_data.read_data_sets('data/',one_hot=True)
train_img = mnist.train.images #记录的是每一个特征值的大小(可以理解为得分) 也可以这样理解:相当于计算成绩时,有语文,数学,英语等科目,然后下面有对应的分数,然后乘以相应的权重值也就得到了总分数;
train_label = mnist.train.labels
test_img = mnist.test.images
test_label = mnist.test.labels
print(train_label[0])
print(len(train_img[0]))
#定义一些使用参数
#n_hedden_x是每一层网络的神经元
n_hidden_1 =256
n_hidden_2 = 128
n_input = 784
n_classes = 10
#定义x,y
x = tf.placeholder(tf.float32,[None,n_input])
y = tf.placeholder(tf.float32,[None,n_classes])
#定义权重值和偏重值
# 定义方差
stddev = 0.1
weights = {
'w1':tf.Variable(tf.random_normal([n_input,n_hidden_1],stddev=stddev)),
'w2':tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2],stddev=stddev)),
'out':tf.Variable(tf.random_normal([n_hidden_2,n_classes],stddev=stddev)),
}
biases = {
'b1':tf.Variable(tf.random_normal([n_hidden_1])),
'b2':tf.Variable(tf.random_normal([n_hidden_2])),
'out':tf.Variable(tf.random_normal([n_classes]))
}
print('neural network ready')
#定义前向传播函数:
# def forward_propagation(_x,_weights,_biases):
# layer1 = tf.nn.sigmoid(tf.add(tf.matmul(_x,_weights['w1'],_biases['b1'])))
# layer2 = tf.nn.sigmoid(tf.add(tf.matmul(layer1,_weights['w2']),_biases['b2']))
# return tf.add(tf.matmul(layer2,_weights['out']),_biases['out'])
def multilayer_perceptron(_X, _weights, _biases):
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(_X, _weights['w1']), _biases['b1']))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, _weights['w2']), _biases['b2']))
return (tf.matmul(layer_2, _weights['out']) + _biases['out'])
#预测
pred = multilayer_perceptron(x,weights,biases)
#反向传播
#利用交叉熵来定义损失函数
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits= pred,labels= y))
optimizer = tf.train.GradientDescentOptimizer(0.3)
train = optimizer.minimize(cost)
corr = tf.equal(tf.arg_max(pred,1),tf.arg_max(y,1))
accr = tf.reduce_mean(tf.cast(corr,'float'))
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
train_epochs = 6
batch_size = 1000
train_step = 2
avg_cost = 0
for epoch in range(train_epochs):
num_batch = int(mnist.train.num_examples/batch_size)+1
batch_example,batch_label = mnist.train.next_batch(batch_size)
# plt.scatter(batch_example,batch_label,c='r')
# plt.show()
print(batch_label.shape,batch_example.shape)
for i in range(num_batch):
feed_seed = {x:batch_example,y:batch_label}
sess.run(train,feed_dict=feed_seed)
avg_cost += sess.run(cost,feed_dict=feed_seed)
avg_cost = avg_cost/num_batch
if (epoch+1) % train_step ==0:
# feed_seed2 = {x: batch_example, y: batch_label}
# print(sess.run(cost,feed_dict=feed_seed2))
# print('------------')
# print(sess.run(w,feed_dict=feed_seed),sess.run(b,feed_seed2))
# print('--------------')
# print(cost)
# print(cost/num_batch)
feed_train = {x:batch_example,y:batch_label}
feed_test = {x:test_img,y:test_label}
print('train accr:%f'%sess.run(accr,feed_dict=feed_train))
print('test accr:%f'%sess.run(accr,feed_dict=feed_test))
print('avg_cost:%f'%avg_cost)