LeNet5与MNIST--tensorflow下的训练模型参数及非深度学习框架下纯python网络实现并前向参数验证

最新推荐文章于 2023-04-12 16:46:12 发布

我爱写BUG

最新推荐文章于 2023-04-12 16:46:12 发布

阅读量365

点赞数

分类专栏：深度学习与Python 文章标签：卷积神经网络 tensorflow 深度学习

本文链接：https://blog.csdn.net/qq_43797817/article/details/108032475

版权

深度学习与Python 专栏收录该内容

39 篇文章 5 订阅

订阅专栏

1.tensorflow训练模型，获得权重偏置参数

# 输入层不算神经网络的层数，LeNet-5 是一个 5 层的网络。（把 卷积和池化 当作一个 layer）
#  输入  卷积-池化 卷积-池化 全连接  全连接 全连接输出

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
#tf.reset_default_graph()  # ValueError: Variable layer1-c1/w already exists, disallowed.


# 调用GPU训练=============================================================================
# import os
# os.environ['CUDA_VISIBLE_DEVICES']='2'
##os.environ['CUDA_VISIBLE_DEVICES']='2,3'#指定第几块GPU来训练
#
# =============================================================================




batch_size = 100  # 批大小
learning_rate_base = 0.01  # 学习率
training_iters = mnist.train.num_examples //batch_size  # 训练轮

input_node = 784  # 28*28
output_node = 10  # 0-9

image_size = 28  # 像素尺寸
num_channels = 1  # 通道数
num_labels = 10  # 标签数/类别数

# 卷积层C1：深度及尺寸
conv1_deep = 32
conv1_size = 5 #变为3，精确度会提升

# 卷积层C2：深度及尺寸
conv2_deep = 64
conv2_size = 5

# 全连接层
fc_size1= 512
fc_size2= 256


def LENET5(input_tensor):
    with tf.variable_scope('layer1'):  # 变量域 (variable scope) 必须要在tf.variable_scope的作用域下使用tf.get_variable()函数
        conv1_w = tf.get_variable("w1", [conv1_size, conv1_size, num_channels, conv1_deep],
                                  initializer=tf.truncated_normal_initializer(stddev=0.1))
        # 获取一个已经存在的变量或者创建一个新的变量
        # tf.truncated_normal_initializer意为：从截断的正态分布中输出随机值，如果生成的值大于平均值2个标准偏差的值则丢弃重新选择.stddev要生成的随机值的标准偏差。　
        conv1_b = tf.get_variable("b1", [conv1_deep], initializer=tf.constant_initializer(0.0))
        # 初始化为常数，这个非常有用，通常偏置项就是用它初始化的。
        conv1 = tf.nn.conv2d(input_tensor, conv1_w, strides=[1, 1, 1, 1], padding='SAME')  # lenet只接受32*32，全0填充
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_b))
        pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        """relu1.shape=[ None 28 28 32]        pool1.shape=[ None 14 14 32]"""

    with tf.variable_scope('layer2'):
        conv2_w = tf.get_variable("w2", [conv2_size, conv2_size, conv1_deep, conv2_deep],
                                  initializer=tf.truncated_normal_initializer(stddev=0.1))
        # 获取一个已经存在的变量或者创建一个新的变量
        conv2_b = tf.get_variable("b2", [conv2_deep], initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1, conv2_w, strides=[1, 1, 1, 1], padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_b))
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        """relu1.shape=[ None 14 14 64]       pool2.shape=[ None 7 7 64]"""

    with tf.variable_scope('layer3'):

        reshaped = tf.contrib.layers.flatten(pool2)
        nodes = reshaped.shape[1]
        """reshaped.shape=[ None 3136]       nodes=3136"""
        # 将第四层池化层的输出转化为第五层全连接层的输入格式。第四层的输出为7x7x64的矩阵，
        # 然而第五层全连接层需要的输入格式为向量
    # =============================================================================
    #        #另外一种转换方式
    #         pool_shape=pool2.get_shape().as_list()
    #         nodes=pool_shape[1]*pool_shape[2]*pool_shape[3]#矩阵拉直长度
    #         reshaped=tf.reshape(pool2,[pool_shape[0],nodes])
    # =============================================================================
        fc1_w = tf.get_variable("w3", [nodes, fc_size1], initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc1_b = tf.get_variable("b3", [fc_size1], initializer=tf.constant_initializer(0.1))
        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_w) + fc1_b)
        """fc1.shape=[ None  512]"""
    with tf.variable_scope('layer4'):
        fc2_w = tf.get_variable('w4', [fc_size1, fc_size2], initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc2_b = tf.get_variable("b4", [fc_size2], initializer=tf.constant_initializer(0.1))
        fc2 = tf.matmul(fc1, fc2_w) + fc2_b
        """fc2.shape=[ None  256]"""

    with tf.variable_scope('layer5'):
        fc3_w = tf.get_variable('w5', [fc_size2, num_labels], initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc3_b = tf.get_variable("b5", [num_labels], initializer=tf.constant_initializer(0.1))
        logit = tf.matmul(fc2, fc3_w) + fc3_b
        """logit.shape=[ None  10]"""
    return logit

x = tf.placeholder(tf.float32, [None, image_size, image_size, num_channels], name='x-input')
y = tf.placeholder(tf.float32, [None, output_node], name='y-input')
y_= LENET5(x)
cross_entroy=tf.nn.softmax_cross_entropy_with_logits(logits=y_,labels=y)
loss=tf.reduce_mean(cross_entroy)
train_step=tf.train.AdamOptimizer(0.001).minimize(loss)
correct_pre=tf.equal(tf.arg_max(y,1),tf.arg_max(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_pre,tf.float32))

saver = tf.train.Saver(max_to_keep=1)
model_path="./CNN_Model/CNN_Model.ckpt"

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    max_acc = 0
    for i in range(training_iters * 3):
        xs, ys = mnist.train.next_batch(batch_size)
        xs = np.reshape(xs, [batch_size, image_size, image_size, num_channels])

        if i % 40 == 0:
             train_acc = accuracy.eval(feed_dict={x: xs, y: ys})
             print("step:%d ,train accuracy %g" % (i, train_acc))
        # =============================================================================
    # 保存模型
        if train_acc > max_acc:  # 保存验证精度最高的一代
             max_acc = train_acc
             saver.save(sess, model_path)
    # =============================================================================
        train_step.run(feed_dict={x: xs, y: ys})


print("===========Starting 2nd session=============")

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, model_path)
    xt=np.reshape(mnist.test.images,[10000,28,28,1])
    print ("Test Accuracy:", accuracy.eval({x:xt, y: mnist.test.labels}))

10000张测试集图片精确度好像为0.97多，太久忘记了。

2.提取模型参数，用于验证

import tensorflow as tf
import numpy as np

conv1_deep=32
conv1_size=5

conv2_deep=64
conv2_size=5

# =============================================================================
## 读取训练模型
reader = tf.train.NewCheckpointReader('./CNN_Model.ckpt')
all_variables = reader.get_variable_to_shape_map()

w1 = reader.get_tensor("layer1/w1")#tensorflow中定义的名称
w1=w1.reshape(-1,conv1_deep)#将（5，5，1，32）转换为2维
b1 = reader.get_tensor("layer1/b1")


w2 = reader.get_tensor("layer2/w2")
w2=w2.reshape(-1,conv2_deep)#将（5，5，1，64）转换为2维
b2 = reader.get_tensor("layer2/b2")


w3 = reader.get_tensor("layer3/w3")
b3 = reader.get_tensor("layer3/b3")

w4 = reader.get_tensor("layer4/w4")
b4 = reader.get_tensor("layer4/b4")

w5 = reader.get_tensor("layer5/w5")
b5 = reader.get_tensor("layer5/b5")
# =============================================================================


# =============================================================================
#写入数据


np.savetxt('w1.txt',w1,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位
np.savetxt('b1.txt',b1,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位

np.savetxt('w2.txt',w2,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位
np.savetxt('b2.txt',b2,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位

np.savetxt('w3.txt',w3,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位
np.savetxt('b3.txt',b3,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位


np.savetxt('w4.txt',w4,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位
np.savetxt('b4.txt',b4,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位

np.savetxt('w5.txt',w5,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位
np.savetxt('b5.txt',b5,fmt='%0.9f') #fmt:f十进制浮点数  0.9为小数点后保留9位


# =============================================================================


# =============================================================================
##读取数据
#b=np.loadtxt('w1.txt',dtype=np.float32)
#w1=b.reshape(5,5,1,32)
#print(w1)
# =============================================================================


# =============================================================================
##输出数据格式及形状
#print(type(w1))
#print(w1.shape)
# #print(w1[0]) 
# =============================================================================

提取参数名称需要和保存模型中对应，写入数据及读取数据的维度需要转换。

3.自定义函数实现LeNet5网络，进行前向验证

使用自定义函数实现4维卷积函数，4维最大池化函数，softmax等功能，甚至尽量不使用numpy中的功能，达到往底层实现LeNet5并前向验证，主要为理解卷积神经网路的实现原理。

#LeNet5:[卷积层-池化层]-[卷积层-池化层]-全连接层-全连接层-全连接层

#导入函数
#from numpy import *
import numpy as np
import cv2 #也可以使用pillow对图片操作
from struct import unpack#处理二进制数据，将字节流转换成python数据类型。它的函数原型为：struct.unpack(fmt, string)


#========================加载处理数据集=======================================================
def __read_image(path):
    with open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        # 解析文件头信息
        # 文件头信息，依次为魔数、图片数量、每张图片高、每张图片宽
        # 因为数据结构中前4行的数据类型都是32位整型，所以采用i(int,8Byte)格式，但我们需要读取前4行数据，所以需要4个i。我们后面会看到标签集中，只使用2个ii。
        img = np.fromfile(f, dtype=np.uint8).reshape(num, 784)
    return img


def __read_label(path):
    with open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.fromfile(f, dtype=np.uint8)
    return lab


def __normalize_image(image):
    img = image.astype(np.float32) / 255.0
    # 每个像素的像素值为0~255。将其归一化压缩到0~1
    return img


def __one_hot_label(label):
    # 将标签变为独热编码格式
    lab = np.zeros((label.size, 10))
    for i, row in enumerate(lab):
        row[label[i]] = 1
    return lab


def load_mnist(normalize=True, one_hot=True):
    # train_image_path, train_label_path, test_image_path, test_label_path,
    '''读入MNIST数据集
    Parameters
    ----------
    normalize : 将图像的像素值正规化为0.0~1.0
    one_hot_label :
        one_hot为True的情况下，标签作为one-hot数组返回
        one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组
    Returns
    ----------
    (训练图像, 训练标签), (测试图像, 测试标签)
    '''
    train_image_path = 'MNIST_data/train-images.idx3-ubyte'
    train_label_path = 'MNIST_data/train-labels.idx1-ubyte'
    test_image_path = 'MNIST_data/t10k-images.idx3-ubyte'
    test_label_path = 'MNIST_data/t10k-labels.idx1-ubyte'
    image = {
        'train': __read_image(train_image_path),
        'test': __read_image(test_image_path)
    }

    label = {
        'train': __read_label(train_label_path),
        'test': __read_label(test_label_path)
    }

    if normalize:
        for key in ('train', 'test'):
            image[key] = __normalize_image(image[key])

    if one_hot:
        for key in ('train', 'test'):
            label[key] = __one_hot_label(label[key])

    return (image['train'], label['train']), (image['test'], label['test'])

#==================softmax===============================================================
def softmax( f ):
    f -= np.max(f)  # 防止数值上溢 # f becomes [-666, -333, 0]
    return np.exp(f) / np.sum(np.exp(f))



#======================激活函数==============================================================
def relu(z):
    return np.maximum(0, z)
    # z[z < 0] = 0
    # return z



#=========================最大池化实现======================================================
def max_pooling(x, pooling, strides=2, padding=0):
    """
    :param x: 卷积层矩阵,形状(N,C,H,W)，N为batch_size，C为通道数
    :param pooling: 池化大小(k1,k2)
    :param strides: 步长
    :param padding: 0填充
    #28/2=0>pad=0
    :return:
    """
    N, H, W, C = x.shape
    # 零填充
    padding_z = np.zeros((N, H + 2 * padding, W + 2 * padding, C))  # 填充pad 后的维度
    padding_z[:, padding:padding + H, padding:padding + W, :] = x  # 填充pad 后的x
    # 输出的高度和宽度
    out_h = (H + 2 * padding - pooling) // strides + 1
    out_w = (W + 2 * padding - pooling) // strides + 1

    pool_z = np.zeros((N, out_h, out_w, C))
    max_z  = np.zeros((N, pooling, pooling, C))
    max_p = np.zeros((N,  1, C))

    for n in range(N):
        for c in range(C):
            for i in range(out_h):
                for j in range(out_w):
                    for p in range(pooling):
                        max_p[n,: ,c]=padding_z[n, strides * i,  strides * j,  c]
                        for q in range(pooling):                        
                            max_z[n, p, q, c]=padding_z[n, strides * i + p, strides * j + q, c]
                            if  max_z[n, p, q, c] > max_p[n,:,c]:#找出最大值
                                max_p[n,:,c]=max_z[n, p, q, c]
                    pool_z[n, i, j, c] = max_p[n, : , c]
    return pool_z  # N,H, W, C




#=============卷积实现===================================================================
def conv2d(x, w, b, Stride, Pad):
    """
    - x: (N,H, W, C,)
    - w:  (HH, WW,C,F)
    - b: (F,)
    - 'stride':移动步长
    - 'pad': 输入零填充的像素数
    - Output data：(N, F, H', W')
    """
    out = None
    N, H, W, C = x.shape  # N,H, W, C
    HH, WW, CC, F = w.shape  # HH, WW, C，F

    Ho = 1 + (H + 2 * Pad - HH) // Stride  # 输出H
    Wo = 1 + (W + 2 * Pad - WW) // Stride  # 输出W

    x_pad = np.zeros((N, H + 2 * Pad, W + 2 * Pad, C))  # 填充pad 后的维度
    x_pad[:, Pad:Pad + H, Pad:Pad + W, :] = x  # 填充pad 后的x
    
    kernel= np.zeros((N, HH, WW, CC))#卷积
    out = np.zeros((N, Ho, Wo, F))  # 输出

    for f in range(F):
        for i in range(Ho):
            for j in range(Wo):  # N*C*HH*WW, C*HH*WW = N*C*HH*WW, sum -> N*1
                for m in range(HH):
                    for n in range(WW):
                        kernel[:,m,n,:]=x_pad[:, i * Stride + m, j * Stride + n, :] * w[m, n, :,f]#卷积数据对应相乘
                out[:,i,j,f]=np.sum(kernel[:,:,:,:], axis=(1, 2,3) ) #把axis对应的维压缩相加。
        out[:, :, :, f] += b[f]  # 加偏置
    return out  # N,H,W,F

#=================参数===========================================================================
input_node = 784  # 28*28
output_node = 10  # 0-9
image_size = 28  # 像素尺寸
num_channels = 1  # 通道数
num_labels = 10  # 标签数/类别数
conv1_deep = 32# 卷积层C1：深度及尺寸
conv1_size = 5
conv2_deep = 64# 卷积层C2：深度及尺寸
conv2_size = 5
fc_size1= 512# 全连接层
fc_size2= 256


#=============导入权重偏置数据===================================================================
w1=np.loadtxt('w1.txt',dtype=np.float32)#卷积层+池化
w1=w1.reshape(conv1_size, conv1_size, num_channels, conv1_deep)
b1=np.loadtxt('b1.txt',dtype=np.float32)

w2=np.loadtxt('w2.txt',dtype=np.float32)#卷积层+池化
w2=w2.reshape(conv2_size, conv2_size, conv1_deep, conv2_deep)
b2=np.loadtxt('b2.txt',dtype=np.float32)

w3=np.loadtxt('w3.txt',dtype=np.float32)#全连接1
b3=np.loadtxt('b3.txt',dtype=np.float32)

w4=np.loadtxt('w4.txt',dtype=np.float32)#全连接2
b4=np.loadtxt('b4.txt',dtype=np.float32)

w5=np.loadtxt('w5.txt',dtype=np.float32)#全连接3
b5=np.loadtxt('b5.txt',dtype=np.float32)




#=====================加载全部测试数据==========================================================
(x_train, y_train), (x_test, y_test) =load_mnist(normalize=True, one_hot=True)
x_test=np.reshape(x_test,[-1,28,28,1])





#=====================输入单张图片===========================================================
# img = cv2.imread("./9.BMP", -1)
# img = np.array(img)/255 #增加维度，类似于将该数组外面在添加一个数组
# img = np.expand_dims(img, axis=0)
# x_test = np.expand_dims(img, axis=3)#[1,28,28,1]



#======================layer1============================================================
"""conv1.shape=[None 28 28 32]    pool1.shape=[None 14 14 32]"""
conv1=conv2d(x_test,w1,b1,Stride=1,Pad=2) #conv2d(x, w, b, Stride, Pad)   #pad=2  >>Ho, Wo=28
relu1=relu(conv1)
pool1=max_pooling(relu1, pooling=2, strides=2, padding=0)


#========================layer2==========================================================
"""conv1.shape=[None 14 14 64]    pool2.shape=[None 7 7 64]"""
conv2=conv2d(pool1,w2,b2,Stride=1,Pad=2)#
relu2=relu(conv2)
pool2=max_pooling(relu2,pooling=2, strides=2, padding=0)#
#转换为全连接需要的形状
"""reshaped.shape=[ None 3136]       nodes=3136"""
#pool_shape=pool2.get_shape().as_list()
pool_shape=pool2.shape#.tolist()
nodes=pool_shape[1]*pool_shape[2]*pool_shape[3]#矩阵拉直长度
reshaped=np.reshape(pool2,[pool_shape[0],nodes])


#====================layer3================================================================
"""fc1.shape=[ None  512]"""
fc01= np.matmul(reshaped, w3)+b3
fc1=relu(fc01)

#====================layer4================================================================
"""fc2.shape=[ None  256]"""
fc02= np.matmul(fc1, w4)+b4
fc2=relu(fc02)

#=====================layer5==============================================================
"""fc3.shape=[ None  10]"""
fc3= np.matmul(fc2, w5)+b5


#======================softmax=============================================================
y_=softmax(fc3)




#=======================获取精度==========================================================
accuracy=np.mean(np.equal(np.argmax(y_,axis=-1),np.argmax(y_test,axis=-1)))#在pyhton中，-1代表倒数第一个
print("test accuracy: {}".format(accuracy))#.format()格式化字符串 把传统的%替换为{}来实现格式化输出，前面有“{}”





#=======================单张判断数字==========================================================
# print(y_)
# num=np.argmax(y_,axis=1)
# print("the number is:{} ".format(num))
#===========================================================================================

相同10000张测试图片，精确度比tensorflow相同参数验证下减少约0.06。
按照理论来说，相同权重与偏置应该测试结果相同，暂时想不明白。

看下面>>
仅使用numpy从头开始实现神经网络,包括反向传播公式推导过程; numpy构建全连接层、卷积层、池化层、Flatten层；以及图像分类案例及精调网络案例等,持续更新中… …