import tensorflow as tf
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import OneHotEncoder
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#导入MNIST数据集
mnist = tf.keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
print(X_train.shape)
print(Y_train.shape)
# 参数设置
learning_rate = 0.001
train_epoches = 50
batch_size = 100
display_step = 1
# 网络参数
layers_dimension = [784, 256, 256, 10]
layer_num = len(layers_dimension)
def onehot(y, start, end, categories='auto'):
ohot = OneHotEncoder()
a = np.linspace(start, end-1, end-start)
b = np.reshape(a, [-1, 1])
# b = np.array(b, dtype=np.int32)
ohot.fit(b)
c = ohot.transform(y).toarray()
return c
def mnistLable2ontHot(X_train, Y_train, X_test, Y_test, shuff=True):
Y_train = np.reshape(Y_train, [-1,1])
Y_test = np.reshape(Y_test, [-1,1])
Y_train = onehot(np.array(Y_train, dtype=np.int32), 0, layers_dimension[layer_num-1])
Y_test = onehot(np.array(Y_test, dtype=np.int32), 0, layers_dimension[layer_num-1])
if shuff==True:
X_train, Y_train = shuffle(X_train, Y_train)
X_test, Y_test = shuffle(X_test, Y_test)
return X_train, Y_train, X_test, Y_test
X_train, Y_train, X_test, Y_test = mnistLable2ontHot(X_train, Y_train, X_test, Y_test, shuff=True)
# 输入数据
x = tf.placeholder('float', [None, layers_dimension[0]])
y = tf.placeholder('float', [None, layers_dimension[layer_num-1]])
# 构建网络模型
def multilayer_perception(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
out_layer = tf.matmul(layer_2, weights['out'])+biases['out']
return out_layer
#
def get_weights_biases(layers_dimension):
weights = {}
biases = {}
layer_num = len(layers_dimension)
for i in range(1, layer_num-1):
weight_name = 'w'+str(i)
biase_name = 'b'+str(i)
weight = tf.Variable(tf.random_normal([layers_dimension[i-1], layers_dimension[i]]))
weights[weight_name] = weight
biases[biase_name] = tf.Variable(tf.random_normal([layers_dimension[i]]))
weights['out'] = tf.Variable(tf.random_normal([layers_dimension[layer_num-2], layers_dimension[layer_num-1]]))
biases['out'] = tf.Variable(tf.random_normal([layers_dimension[layer_num-1]]))
return weights, biases
# weights = {
# 'h1':tf.Variable(tf.random_normal([layers_dimension[0], layers_dimension[1]])),
# 'h2':tf.Variable(tf.random_normal([layers_dimension[1], layers_dimension[2]])),
# 'out':tf.Variable(tf.random_normal([layers_dimension[2], layers_dimension[layer_num-1]]))
# }
weights, biases = get_weights_biases(layers_dimension)
pred = multilayer_perception(x, weights, biases)
# 损失函数与优化方法
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# 初始化变量
init = tf.global_variables_initializer()
# 启用Session
with tf.Session() as sess:
sess.run(init)
for epoch in range(train_epoches):
avg_cost = 0
total_batch = int(X_train.shape[0]/batch_size)
# 遍历整个训练数据集
for i in range(total_batch):
batch_x = X_train[i*batch_size:(i+1)*batch_size, :]
batch_x = np.reshape(batch_x, [-1, 28*28])
batch_y = Y_train[i*batch_size:(i+1)*batch_size, :]
correct_predition = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
Accuracy = tf.reduce_mean(tf.cast(correct_predition, 'float'))
_, c, Acc = sess.run([optimizer, cost, Accuracy],
feed_dict={x:batch_x, y:batch_y})
avg_cost = c/total_batch
if epoch % display_step == 0:
print("epoch:%04d" %(epoch+1), "cost={:.9f}".format(avg_cost), "Accuracy:", Acc)