学习笔记：cnn 身份证数字识别

最新推荐文章于 2024-01-15 16:33:11 发布

荣•厚德载物

最新推荐文章于 2024-01-15 16:33:11 发布

阅读量2.9k

点赞数

分类专栏： AI学习记录---DL,CS,CV视觉

本文链接：https://blog.csdn.net/zr940326/article/details/80793046

版权

AI学习记录---DL,CS,CV视觉专栏收录该内容

56 篇文章 2 订阅

订阅专栏

这篇文章跟cnn猫狗识别是差不了多少的，只是数据处理，与训练时做了稍微的调整，数据集和代码可以通过，https://github.com/zr940326/tfCnnID 获取.

1.数据集

图片大小是 28×28 的

代码编写，数字处理，文件名 DataUtils.py

#coding=utf-8
#数据拆分
import math 
import os
import numpy as np 

#导入 tensorflow 
import tensorflow as tf

"""
 数据 打标签 并拆分成 测试和训练数据

"""
# 0 类 和 0类 标签
zeroclass = []   
label_zeroclass = []
# 1类 和  1类标签
oneclass=[]   
label_oneclass=[]
# 2类 和 2类标签
twoclass=[]
label_twoclass=[]
#3类 和 3类标签
threeclass=[]
label_threeclass=[]
#4类 和 4类标签
fourclass=[]
label_fourclass=[]
#5类 和 5类标签
fiveclass=[]
label_fiveclass=[]
#6类 和 6类标签
sixclass=[]
lable_sixclass=[]
#7类 和 7类标签
sevenclass=[]
lable_sevenclass=[]
#8类 和 8类标签
eightclass=[]
label_eightclass=[]
#9类 和 9类标签
nineclass=[]
lable_nineclass=[]

def get_files(file_dir,ratio):  
    for file in os.listdir(file_dir+'/0'):    
        zeroclass.append(file_dir +'/0'+'/'+ file)     
        label_zeroclass.append(0)    
    for file in os.listdir(file_dir+'/1'):    
        oneclass.append(file_dir +'/1'+'/'+file)    
        label_oneclass.append(1)    
    for file in os.listdir(file_dir+'/2'):    
        twoclass.append(file_dir +'/2'+'/'+ file)     
        label_twoclass.append(2)    
    for file in os.listdir(file_dir+'/3'):    
        threeclass.append(file_dir +'/3'+'/'+file)    
        label_threeclass.append(3)        
    for file in os.listdir(file_dir+'/4'):    
        fourclass.append(file_dir +'/4'+'/'+file)    
        label_fourclass.append(4)        
    for file in os.listdir(file_dir+'/5'):    
        fiveclass.append(file_dir +'/5'+'/'+file)    
        label_fiveclass.append(5)
    for file in os.listdir(file_dir+'/6'):    
        sixclass.append(file_dir +'/6'+'/'+file)    
        lable_sixclass.append(6)
    for file in os.listdir(file_dir+'/7'):    
        sevenclass.append(file_dir +'/7'+'/'+file)    
        lable_sevenclass.append(7)
    for file in os.listdir(file_dir+'/8'):    
        eightclass.append(file_dir +'/8'+'/'+file)    
        label_eightclass.append(8)  

    for file in os.listdir(file_dir+'/9'):    
        nineclass.append(file_dir +'/9'+'/'+file)    
        lable_nineclass.append(9)  

    ##对生成图片路径和标签list打乱处理（img和label）  
    image_list=np.hstack((zeroclass, oneclass, twoclass, threeclass, fourclass, fiveclass,sixclass,sevenclass,eightclass,nineclass))  
    label_list=np.hstack((label_zeroclass, label_oneclass, label_twoclass, label_threeclass, label_fourclass,
    label_fiveclass,lable_sixclass,lable_sevenclass,label_eightclass,lable_nineclass))  

    #shuffle打乱  
    temp = np.array([image_list, label_list])  
    temp = temp.transpose()  
    np.random.shuffle(temp)  

    #将所有的img和lab转换成list  
    all_image_list=list(temp[:,0])  
    all_label_list=list(temp[:,1])  

    #将所得List分为2部分，一部分train,一部分val，ratio是验证集比例  
    n_sample = len(all_label_list)    
    n_val = int(math.ceil(n_sample*ratio))   #验证样本数    
    n_train = n_sample - n_val   #训练样本数    
    
    tra_images = all_image_list[0:n_train]  
    tra_labels = all_label_list[0:n_train]    
    tra_labels = [int(float(i)) for i in tra_labels]    
    val_images = all_image_list[n_train:]    
    val_labels = all_label_list[n_train:]  
    val_labels = [int(float(i)) for i in val_labels]      
    return tra_images,tra_labels,val_images,val_labels  



"""
将图片转为 tensorFlow 能读取的张量
"""
def get_batch(image,label,image_W,image_H,batch_size,capacity):
    #数据转换
    image = tf.cast(image, tf.string)   #将image数据转换为string类型
    label = tf.cast(label, tf.int32)    #将label数据转换为int类型
    #入队列
    input_queue = tf.train.slice_input_producer([image, label])
    #取队列标签 张量
    label = input_queue[1] 
    #取队列图片 张量
    image_contents = tf.read_file(input_queue[0])

    #解码图像，解码为一个张量
    image = tf.image.decode_jpeg(image_contents, channels=3)

    #对图像的大小进行调整，调整大小为image_W,image_H
    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
    #对图像进行标准化
    image = tf.image.per_image_standardization(image)

    #等待出队
    image_batch, label_batch = tf.train.batch([image, label],
                                                batch_size= batch_size,
                                                num_threads= 64, 
                                                capacity = capacity)

    label_batch = tf.reshape(label_batch, [batch_size]) #将label_batch转换格式为[]
    image_batch = tf.cast(image_batch, tf.float32)   #将图像格式转换为float32类型
  
    return image_batch, label_batch  #返回所处理得到的图像batch和标签batch

2. 网络编写，cnn 网络，文件名 MainModel.py

#coding=utf-8
import tensorflow as tf 

"""
##1.网络推理
"""
def inference(images, batch_size, n_classess):
    
    """
    第一个卷积层
    """
    # tf.variable_scope() 主要结合 tf.get_variable() 来使用，实现变量共享。下次调用不用重新产生，这样可以保存参数
    with tf.variable_scope('conv1') as scope:
         #初始化权重，[3,3,3,16]
        weights = tf.get_variable('weights', shape = [3, 3, 3, 16], dtype = tf.float32,
                                  initializer = tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
         #初始化偏置，16个
        biases = tf.get_variable('biases', shape=[16], dtype = tf.float32,
                                 initializer = tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(images, weights, strides=[1,1,1,1], padding='SAME')
        
        # 将偏置加在所得的值上面
        pre_activation = tf.nn.bias_add(conv, biases)
        # 将计算结果通过relu激活函数完成去线性化
        conv1 = tf.nn.relu(pre_activation, name= scope.name)

    """
    池化层
    """
    with tf.variable_scope('pooling1_lrn') as scope:
        # tf.nn.max_pool实现了最大池化层的前向传播过程，参数和conv2d类似，ksize过滤器的尺寸
        pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME',name='poolong1')
        # 局部响应归一化（Local Response Normalization），一般用于激活，池化后的一种提高准确度的方法。
        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1, alpha=0.001/9.0, beta=0.75, name='norm1')

    """
    第二个卷积层
    """
    # 计算过程和第一层一样，唯一区别为命名空间
    with tf.variable_scope('conv2') as scope:
        weights = tf.get_variable('weights', shape=[3,3,16,16], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[16], 
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(norm1, weights, strides=[1,1,1,1],padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(pre_activation, name='conv2')
    
    """
    第二池化层
    """
    with tf.variable_scope('pooling2_lrn') as scope:
        norm2 = tf.nn.lrn(conv2, depth_radius=4,bias=1,alpha=0.001/9,beta=0.75,name='norm2')
        pool2 = tf.nn.max_pool(norm2, ksize=[1,3,3,1],strides=[1,1,1,1],padding='SAME',name='pooling2')
    
    
    """
     local3 全连接层
    """
    with tf.variable_scope('local3') as scope:
        # -1代表的含义是不用我们自己指定这一维的大小，函数会自动计算
        reshape = tf.reshape(pool2, shape=[batch_size, -1])
        # 获得reshape的列数，矩阵点乘要满足列数等于行数
        dim = reshape.get_shape()[1].value
        weights = tf.get_variable('weights', shape=[dim,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',shape=[128], dtype=tf.float32,initializer=tf.constant_initializer(0.1))
        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
    
    """
     local4 全连接层
    """
    with tf.variable_scope('local4') as scope:
        weights = tf.get_variable('weights',shape=[128,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases', shape=[128],dtype=tf.float32, initializer=tf.constant_initializer(0.1))
        local4 = tf.nn.relu(tf.matmul(local3,weights) + biases, name = 'local4')
    """
     lsoftmax逻辑回归
    """
    with tf.variable_scope('softmax_linear') as scope:
        weights = tf.get_variable('softmax_linear',shape=[128, n_classess],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',shape=[n_classess],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
        softmax_linear = tf.add(tf.matmul(local4, weights),biases,name='softmax_linear')
        
    return softmax_linear

"""
##2.定义损失函数，定义传入值和标准值的差距
"""

def losses(logits, labels):
    with tf.variable_scope('loss') as scope:
        # 计算使用了softmax回归后的交叉熵损失函数
        # logits表示神经网络的输出结果，labels表示标准答案
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels,name='xentropy_per_example')
        # 求cross_entropy所有元素的平均值
        loss = tf.reduce_mean(cross_entropy, name='loss')
        # 对loss值进行标记汇总，一般在画loss, accuary时会用到这个函数。
        tf.summary.scalar(scope.name+'/loss',loss)
    return loss


"""
##3.通过梯度下降法为最小化损失函数增加了相关的优化操作
"""

def trainning(loss, learning_rate):
    with tf.name_scope('optimizer'):
        # 在训练过程中，先实例化一个优化函数，比如tf.train.GradientDescentOptimizer，并基于一定的学习率进行梯度优化训练
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        # 设置一个用于记录全局训练步骤的单值
        global_step = tf.Variable(0, name='global_step',trainable=False)
        # 添加操作节点，用于最小化loss，并更新var_list，返回为一个优化更新后的var_list，如果global_step非None，该操作还会为global_step做自增操作
        train_op = optimizer.minimize(loss, global_step=global_step)
    return train_op

"""
##4.定义评价函数，返回准确率
"""

def evaluation(logits, labels):
    with tf.variable_scope('accuracy') as scope:
        correct = tf.nn.in_top_k(logits,labels,1)    # 计算预测的结果和实际结果的是否相等，返回一个bool类型的张量
        # K表示每个样本的预测结果的前K个最大的数里面是否含有target中的值。一般都是取1。
        # 转换类型
        correct = tf.cast(correct, tf.float16)
        accuracy = tf.reduce_mean(correct)             #取平均值，也就是准确率
        # 对准确度进行标记汇总
        tf.summary.scalar(scope.name+'/accuracy',accuracy)
    return accuracy

3.进行训练和测试

文件名 ModelTraining.py

#coding=utf-8
import os
import numpy as np
import tensorflow as tf

import DataUtils
import MainModel


from PIL import Image  
import matplotlib.pyplot as plt 

N_CLASSES = 10 # 10分类
capacity=256   #队列容量
BATCH_SIZE=10
MAX_STEP = 2000 #最大训练步骤
IMG_W = 28 #图片的宽度
IMG_H = 28 #图片的高度
learning_rate = 0.0001  #学习率

"""
 定义开始训练的函数
"""
def run_training():
    """
    ##1.数据的处理
    """
    # 训练图片路径
    train_dir = '/home/zhang-rong/Yes/CnnID/train/'
    # 输出log的位置
    logs_train_dir = '/home/zhang-rong/Yes/CnnID/log/'

    # 模型输出
    train_model_dir = '/home/zhang-rong/Yes/CnnID/model/'

    tra_list,tra_labels,val_list,val_labels=DataUtils.get_files(train_dir,0.2)
    tra_list_batch,tra_label_batch=DataUtils.get_batch(tra_list,tra_labels,IMG_W,IMG_H,BATCH_SIZE,capacity) # 转成tensorflow 能读取的格式的数据
    val_list_batch,val_label_batch=DataUtils.get_batch(val_list,val_labels,IMG_W,IMG_H,BATCH_SIZE,capacity)
    print val_list,"******",val_labels

    """
    ##2.网络的推理
    """
    # 进行前向训练，获得回归值
    train_logits = MainModel.inference(tra_list_batch, BATCH_SIZE, N_CLASSES)

    """
    ##3.定义交叉熵和 要使用的梯度下降的 优化器 
    """
    # 计算获得损失值loss
    train_loss = MainModel.losses(train_logits, tra_label_batch)
    # 对损失值进行优化
    train_op = MainModel.trainning(train_loss, learning_rate)

    """
    ##4.定义后面要使用的变量
    """
    # 根据计算得到的损失值，计算出分类准确率
    train__acc = MainModel.evaluation(train_logits, tra_label_batch)
    # 将图形、训练过程合并在一起
    summary_op = tf.summary.merge_all()

    # 新建会话
    sess = tf.Session()
  
    # 将训练日志写入到logs_train_dir的文件夹内
    train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
    saver = tf.train.Saver()  # 保存变量

    # 执行训练过程，初始化变量
    sess.run(tf.global_variables_initializer())

    # 创建一个线程协调器，用来管理之后在Session中启动的所有线程
    coord = tf.train.Coordinator()
    # 启动入队的线程，一般情况下，系统有多少个核，就会启动多少个入队线程（入队具体使用多少个线程在tf.train.batch中定义）;
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    """
    进行训练：
    使用 coord.should_stop()来查询是否应该终止所有线程，当文件队列（queue）中的所有文件都已经读取出列的时候，
    会抛出一个 OutofRangeError 的异常，这时候就应该停止Sesson中的所有线程了;
    """
 
    try:
        for step in np.arange(MAX_STEP): #从0 到 2000 次 循环
            if coord.should_stop():
                break
            _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) 


            # 每2步打印一次损失值和准确率
            if step % 2 == 0:
                print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0))

                summary_str = sess.run(summary_op)
                train_writer.add_summary(summary_str, step)



    # 如果读取到文件队列末尾会抛出此异常
    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()       # 使用coord.request_stop()来发出终止所有线程的命令

    coord.join(threads)            # coord.join(threads)把线程加入主线程，等待threads结束


    checkpoint_path = os.path.join(train_model_dir, 'model.ckpt')
    
    # saver.save(sess, checkpoint_path, global_step=step)
    
    saver.save(sess, checkpoint_path)
    sess.close()                   # 关闭会话


def get_one_image_file(img_dir):
    
    image = Image.open(img_dir)
    plt.legend()
    plt.imshow(image)   #显示图片
    image = image.resize([28, 28])
    image = np.array(image)
    return image


"""
进行单张图片的测试
"""
def evaluate_one_image():

    image_array=get_one_image_file("/home/zhang-rong/Yes/CnnID/test_yes/2.jpg")

    with tf.Graph().as_default():
        BATCH_SIZE = 1   # 获取一张图片
        N_CLASSES = 10  #10分类

        image = tf.cast(image_array, tf.float32)
        image = tf.image.per_image_standardization(image)
        image = tf.reshape(image, [1, 28, 28, 3])     #inference输入数据需要是4维数据，需要对image进行resize
        logit = MainModel.inference(image, BATCH_SIZE, N_CLASSES)       
        logit = tf.nn.softmax(logit)    #inference的softmax层没有激活函数，这里增加激活函数

        #因为只有一副图，数据量小，所以用placeholder
        x = tf.placeholder(tf.float32, shape=[28, 28, 3])

        # 
        # 训练模型路径
        logs_train_dir = '/home/zhang-rong/Yes/CnnID/model/'

       
        saver=tf.train.Saver()

        with tf.Session() as sess:

            saver.restore(sess,str(logs_train_dir+"model.ckpt"))

            prediction = sess.run(logit, feed_dict={x: image_array})
            # 得到概率最大的索引
            max_index = np.argmax(prediction)
            print max_index

                

"""
主函数
"""
def main():
    # run_training()
    evaluate_one_image()


if __name__ == '__main__':
    main()

附带一张，目录图片