对利用卷积神经网络训练MNIST数据集程序的理解

from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf
TRAINING_STEPS = 6000   #CSDN博主原来设置20000次,实际测试6000即可,容易过拟合(测试集不好用??)
BATCH_SIZE = 50         # 每次batch打包的样本个数

def trainingMnistModule():
    mnist = input_data.read_data_sets('./MNIST_data', one_hot=True) #MNIST数据集所在路径

    x = tf.placeholder(tf.float32, [None, 784])

    y_ = tf.placeholder(tf.float32, [None, 10])


    def weight_variable(shape):
        initial = tf.truncated_normal(shape,stddev = 0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.1,shape = shape)
        return tf.Variable(initial)

    def conv2d(x,W):
        return tf.nn.conv2d(x, W, strides = [1,1,1,1], padding = 'SAME')

    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])

    x_image = tf.reshape(x,[-1,28,28,1]) # -1表示任意数量的样本数,大小为28x28深度为一的张量

    h_conv1 = tf.nn.relu(conv2d(x_image,W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])

    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])

    y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    saver = tf.train.Saver() #定义saver
    # saver = tf.train.Saver([W_conv1,b_conv1,W_conv2,b_conv2,W_fc2,b_fc2])  # 定义saver
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(TRAINING_STEPS):
            batch = mnist.train.next_batch(50)
            if i % 100 == 0:
                train_accuracy = accuracy.eval(feed_dict={
                    x: batch[0], y_: batch[1], keep_prob: 1.0})
                print('step %d, training accuracy %g' % (i, train_accuracy))
            train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
        saver.save(sess, 'C:\\Users\\Administrator\\Desktop\\paintBoard\\SAVE\\model.ckpt') #模型储存位置 \\转义字符\

        print('test accuracy %g' % accuracy.eval(feed_dict={
            x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
    return train_accuracy

# if __name__ == '__main__':
#     trainingMnistModule()

 

以下是对程序的一个理解,后期有时间继续补充完善。

       这个神经网络总共有三个隐藏层、一个输入层和一个输出层。其中三个隐藏层包括两组卷积-池化层
和一个全连接层。
    一张28X28的图片经过两个卷积-池化层之后变成了7X7X64的矩阵(我理解为64张7X7的小图片),之
后输出到下一个包含1024个神经元的全连接层,权值矩阵W1为[7X7X64,1024]。然后1024个神经元输出到
下一个包含10个神经元的全连接输出层,权值矩阵W2为[1024,10]。1、在第一个全连接层中,我们将7X7
X64的矩阵转换成一个大小为7X7X64的向量,然后与权值矩阵W1做矩阵乘法,最终得到一个大小为1024的
向量,即是1024个神经元的输出。2、将这个向量与权值矩阵W2做矩阵乘法,得到一个大小为10的向量,
向量中存放的数据即是各个数字对应的概率。
    所以,权重矩阵在全连接层中第一个是W1为[7X7X64,1024],第二个是W2为[1024,10]
    接下来是卷积-池化层的说明。1、我们将一条数据(一个大小为28X28的向量)转为一个28X28的矩阵。
然后将这个矩阵输入到一个卷积层中,这个卷积层的卷积核为5X5X1X32的权重矩阵。卷积之后的输出结果
为28X28X32的矩阵。2、接下来输出到一个池化层,池化核为2X2。池化之后的输出结果为14X14X32的矩阵。
3、将第一个卷积-池化层的结果输入到下一个卷积-池化层中,这个卷积层的卷积核为5X5X32X64的权重矩
阵。输出结果为14X14X64的矩阵。经过跟第一个池化层同样的层之后,输出结果为7X7X64的矩阵。之后再
将其转化成向量之后输出到全连接层。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
首先,需要安装并加载以下 R 包:keras、tidyverse、reshape2 然后,导入 MNIST 数据集,将其转换为所需格式: ```r library(keras) library(tidyverse) library(reshape2) # 导入 MNIST 数据集 mnist <- dataset_mnist() # 将训练集和测试集转换为 data.frame x_train <- mnist$train$x %>% array_reshape(dim = c(nrow(.), 28 * 28)) %>% as.data.frame() y_train <- mnist$train$y %>% as.data.frame() x_test <- mnist$test$x %>% array_reshape(dim = c(nrow(.), 28 * 28)) %>% as.data.frame() y_test <- mnist$test$y %>% as.data.frame() # 将类别变量转换为因子 y_train <- factor(y_train) y_test <- factor(y_test) # 将像素值缩放到 0-1 范围内 x_train <- x_train / 255 x_test <- x_test / 255 # 将输入数据转换为矩阵 x_train <- as.matrix(x_train) x_test <- as.matrix(x_test) # 将输出数据转换为 one-hot 编码 y_train <- to_categorical(y_train) y_test <- to_categorical(y_test) ``` 接下来,构建卷积神经网络模型,包括两个卷积层和两个全连接层: ```r # 构建卷积神经网络模型 model <- keras_model_sequential() %>% layer_reshape(target_shape = c(28, 28, 1), input_shape = c(28 * 28)) %>% layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_flatten() %>% layer_dense(units = 128, activation = "relu") %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 10, activation = "softmax") # 编译模型 model %>% compile( loss = "categorical_crossentropy", optimizer = optimizer_rmsprop(), metrics = c("accuracy") ) ``` 然后,训练模型: ```r # 训练模型 history <- model %>% fit( x_train, y_train, epochs = 10, batch_size = 128, validation_split = 0.2 ) ``` 最后,评估模型的性能: ```r # 评估模型性能 model %>% evaluate(x_test, y_test) ``` 完整的代码如下: ```r library(keras) library(tidyverse) library(reshape2) # 导入 MNIST 数据集 mnist <- dataset_mnist() # 将训练集和测试集转换为 data.frame x_train <- mnist$train$x %>% array_reshape(dim = c(nrow(.), 28 * 28)) %>% as.data.frame() y_train <- mnist$train$y %>% as.data.frame() x_test <- mnist$test$x %>% array_reshape(dim = c(nrow(.), 28 * 28)) %>% as.data.frame() y_test <- mnist$test$y %>% as.data.frame() # 将类别变量转换为因子 y_train <- factor(y_train) y_test <- factor(y_test) # 将像素值缩放到 0-1 范围内 x_train <- x_train / 255 x_test <- x_test / 255 # 将输入数据转换为矩阵 x_train <- as.matrix(x_train) x_test <- as.matrix(x_test) # 将输出数据转换为 one-hot 编码 y_train <- to_categorical(y_train) y_test <- to_categorical(y_test) # 构建卷积神经网络模型 model <- keras_model_sequential() %>% layer_reshape(target_shape = c(28, 28, 1), input_shape = c(28 * 28)) %>% layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>% layer_max_pooling_2d(pool_size = c(2, 2)) %>% layer_flatten() %>% layer_dense(units = 128, activation = "relu") %>% layer_dropout(rate = 0.5) %>% layer_dense(units = 10, activation = "softmax") # 编译模型 model %>% compile( loss = "categorical_crossentropy", optimizer = optimizer_rmsprop(), metrics = c("accuracy") ) # 训练模型 history <- model %>% fit( x_train, y_train, epochs = 10, batch_size = 128, validation_split = 0.2 ) # 评估模型性能 model %>% evaluate(x_test, y_test) ``` 运行结果会输出模型在测试集上的准确率。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值