tensorflow实现猫狗分类（LeNet-5）

最新推荐文章于 2024-05-22 19:57:15 发布

傻不拉几的程序员

最新推荐文章于 2024-05-22 19:57:15 发布

阅读量6.3k

点赞数 5

分类专栏： Tensorflow Python

本文链接：https://blog.csdn.net/fengxianghui01/article/details/82153300

版权

Tensorflow 同时被 2 个专栏收录

6 篇文章 0 订阅

订阅专栏

Python

5 篇文章 0 订阅

订阅专栏

本文来源于网络视频作者的提供，这是一个二分类问题，分三部分记载：

1、数据处理，也就是打上标签，猫：0、狗：1

#%%
import tensorflow as tf
import numpy as np
import os

# 
img_width = 208
img_height = 208


#%% 获取图片 及 生成标签
train_dir = 'G:/tensorflow/cats_vs_dogs/data/train/'

def get_files(file_dir):
    '''
    args:
        file_dir: file directory
    Returns:
        ist of images and labels
    '''
    cats = []
    label_cats = []
    dogs = []
    label_dogs = []
    for file in os.listdir(file_dir): # 获取当前目录下的所有文件和目录名
        name = file.split('.') #分割字符段，返回name为一个列表
        if name[0] == 'cat':
            cats.append(file_dir + file)
            label_cats.append(0)
        else:
            dogs.append(file_dir + file)
            label_dogs.append(1)
    print('There are %d cats \nThere are %d dogs' %(len(cats), len(dogs)))
    
    image_list = np.hstack((cats, dogs)) ## 将图像堆叠在一起
    label_list = np.hstack((label_cats, label_dogs)) ## 将图像标签堆叠在一起
    
    temp = np.array([image_list, label_list]) # 将文件名和标签对应起来
    temp = temp.transpose() #矩阵转置
    np.random.shuffle(temp) # 打乱存放的顺序
    
    # 先集合起来打乱在分开的目的是为了获取打乱后的图形及其对应的标签
    image_list = list(temp[:, 0]) # 获取图片
    label_list = list(temp[:, 1]) # 获取标签
    label_list = [float(i) for i in label_list]
    
    return image_list, label_list

#%%
# 对图片进行裁剪
def get_batch(image, label, image_W, image_H, batch_size, capacity):
    '''
    args:
        image: list type
        label: list type
        image_W: image_width
        image_H: image_Height
        batch_size:batch size #每批次的图像量
        capacity: the maxmum elements in queue
    Returns:
        image_batch: 4D tensor [batch_size, width, height, 3],dtype=tf.float32
        label_batch: 1D tensor [batch_size], dtype = tf.float32
    '''
    # 类型转换函数，返回张量
    image = tf.cast(image, tf.string) # 数据类型转换 image->string
    label = tf.cast(label, tf.int32)  # 数据类型转换 label->int32
    
    # make an input queue 生成输入对列
    input_queue = tf.train.slice_input_producer([image, label])
    
    label = input_queue[1] # 读取标签
    image_contents = tf.read_file(input_queue[0]) # 读取图像 string类型
    image = tf.image.decode_jpeg(image_contents, channels = 3) #解码

    ########################################
    # data argumentatioan should go to here
    ########################################
    # 对图片进行裁剪或扩充【在图像中心处裁剪】，统一大小
    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
    # 数据标准化 训练前需要对数据进行标准化
    image = tf.image.per_image_standardization(image) 
    # 生成批次 在输入的tensor中创建一些tensor数据的batch
    image_batch, label_batch = tf.train.batch([image, label],
                                              batch_size = batch_size,
                                              num_threads = 64,
                                              capacity = capacity) 
    # 重新生成大小，即将label_batch变换成[batch_size]行的形式
    label_batch = tf.reshape(label_batch, [batch_size]) 
    
    return image_batch, label_batch

详细操作：

一：标签并打乱

1）通过OS读取文件名，并打上标签；2）将图像（string）和标签（int）分别堆叠在一起，此时的标签和图像是一一对应的；3）将图像和标签由numpy.array()组合在一起，然后通过numpy.random.shuffle()将其打乱，防止过拟合；4）将图像和标签从打乱的数组中读取出来，并将标签转化为float型。

二：制作批次数据

1）转换数据类型：图像转化为string，标签转化为int32；2）通过train里面的slice_input_producer()生成数据队列；3）读取标签和图像，并对图像采用tf.image.decoe_()对应进行解码；4）对解码后的数据进行裁剪（tf.image.resize_image_with_crop_or_pad），并标准化（tf.image.per_image_standardization）；5）生成批次（tf.train.batch()）

中间可以测一下：

import matplotlib.pyplot as plt

BATCH_SIZE = 5 # 批次中的图像数量
CAPACITY = 256 # 队列中最多容纳元素的个数
IMG_W = 208
IMG_H = 208

train_dir = 'data/train/'

image_list, label_list = get_files(train_dir)
image_batch, label_batch = get_batch(image_list, label_list, IMG_W, IMG_H,
                                    BATCH_SIZE, CAPACITY)

with tf.Session() as sess:
    print("start")
    i = 0
    # 开始输入队列监控
    coord = tf.train.Coordinator() # 
    threads = tf.train.start_queue_runners(coord = coord) # 启动入队线程
    
    try:
        while not coord.should_stop() and i<1:
            
            img, label = sess.run([image_batch, label_batch])# 输入list结构
            
            # just test one batch
            # arange返回一个array对象([ ])
            for j in np.arange(BATCH_SIZE):
                print('label: %d'%label[j])
                plt.imshow(img[j,:,:,:]) 
                plt.show()
            i += 1
    except tf.errors.OutOfRangeError:
        print('done!')
    finally:
        print('finished')
        coord.request_stop() # 通知其它线程关闭
    coord.join(threads) # 其他线程关闭之后，这一函数才能返回

2、模型

根据LeNet-5模型写的一个卷积神经网络模型。

import tensorflow as tf

#%%
def inference(images , batch_size, n_classes):
    '''Bulid the model
    Args:
        n_classes = 2：二分类
        images: image batch, 4D tensor, tf.float32, [batch_size, width, height, channels]
    Returns:
        output tensor with the computed logits, float, [batch_size, n_classes]
    '''
    # cov1, shape = [kernel size, kernel size, channels, kernel numbers]
    with tf.variable_scope('conv1') as scope:
        weights = tf.get_variable('weights', 
                                  shape = [3, 3, 3, 16], # 16[kernel numbers]：跟计算的精度有关
                                  dtype = tf.float32,
                                  initializer = tf.truncated_normal_initializer(stddev = 0.1,
                                                                                dtype = tf.float32))
        biases = tf.get_variable('biases', 
                                 shape = [16],  
                                 dtype = tf.float32,
                                 initializer = tf.constant_initializer(0.1)) # 初始化0.1
        conv = tf.nn.conv2d(images, weights, strides = [1, 1, 1, 1], padding = 'SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(pre_activation, name = scope.name)
        
    #pool1 and norm1
    with tf.variable_scope('poling_lrn') as scope:
        pool1 = tf.nn.max_pool(conv1, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], # 标准数值
                               padding = 'SAME', name = 'pooling1')
        norm1 = tf.nn.lrn(pool1, depth_radius = 4, bias = 1.0, alpha = 0.001/9.0, # 局部响应归一化函数。标准数值 
                          beta = 0.75, name = 'norm1')
 
    # conv2 
    with tf.variable_scope('conv2') as scope:
        weights = tf.get_variable('weights', shape = [3, 3, 16, 16],
                                  dtype = tf.float32,
                                  initializer = tf.truncated_normal_initializer(stddev = 0.1,
                                                                                dtype = tf.float32))
        biases = tf.get_variable('biases', 
                                 shape = [16], 
                                 dtype = tf.float32,
                                 initializer = tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(norm1,weights, strides = [1, 1, 1, 1], padding = 'SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(pre_activation, name = 'conv2')
        
    # pool2 and norm2
    with tf.variable_scope('pooling2_lrn') as scope:
        norm2 = tf.nn.lrn(conv2, depth_radius = 4, bias = 1.0, alpha = 0.001/9.0,
                          beta = 0.75, name = 'norm2')
        pool2 = tf.nn.max_pool(norm2, ksize = [1, 3, 3, 1], strides = [1, 1, 1, 1],
                               padding = 'SAME', name = 'pooling2')
        
    # local3
    with tf.variable_scope('local3') as scope:
        reshape = tf.reshape(pool2, shape = [batch_size, -1]) # 变换成向量
        dim = reshape.get_shape()[1].value
        weights = tf.get_variable('weights',
                                  shape = [dim, 128],  # 全连接个数，128
                                  dtype = tf.float32,
                                  initializer = tf.truncated_normal_initializer(stddev = 0.005,
                                                                                dtype = tf.float32))

        biases = tf.get_variable('biases',
                                shape = [128],
                                dtype = tf.float32,
                                initializer = tf.constant_initializer(0.1))  
        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name = scope.name)
        
    # local4
    with tf.variable_scope('local4') as scope:
        weights = tf.get_variable('weights',
                                  shape = [128, 128], 
                                  dtype = tf.float32,
                                  initializer = tf.truncated_normal_initializer(stddev = 0.005,
                                                                                dtype = tf.float32))
        biases = tf.get_variable('biases',
                                 shape = [128], 
                                 dtype = tf.float32,
                                 initializer = tf.constant_initializer(0.1))
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name = 'local4')
        
    # softmax
    with tf.variable_scope('softmax_linear') as scope:
        weights = tf.get_variable('softmax_linear',
                                  shape = [128, n_classes],  # n_classes:2，表示二分类
                                  dtype = tf.float32,
                                  initializer = tf.truncated_normal_initializer(stddev = 0.005,
                                                                                dtype = tf.float32))
        biases = tf.get_variable('biases',
                                 shape = [n_classes], 
                                 dtype = tf.float32,
                                 initializer = tf.constant_initializer(0.1))
        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name = 'softmax_linear')
        
    return softmax_linear

#%% 
def losses(logits, labels):
    with tf.variable_scope('lose') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits\
                (logits = logits, labels = labels, name = 'xentropy_per_example')
        loss = tf.reduce_mean(cross_entropy, name = 'loss')
        tf.summary.scalar(scope.name+'/loss', loss)
    
    return loss

#%% 训练优化
def training(loss, learning_rate):
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
        global_step = tf.Variable(0, name = 'global_step', trainable = False)
        train_op = optimizer.minimize(loss, global_step = global_step)
        
    return train_op

#%%
def evalution(logits, labels):
    with tf.variable_scope('accuracy') as scope:
        correct = tf.nn.in_top_k(logits, labels, 1) # 取最大值
        correct = tf.cast(correct, tf.float16)
        accuracy = tf.reduce_mean(correct)
        tf.summary.scalar(scope.name+'/accuracy', accuracy)
    return accuracy

3、训练模型

import os
import numpy as np
import tensorflow as tf
import import_data
import model

#%%
N_CLASSES = 2
IMG_W = 208
IMG_H = 208
BATCH_SIZE = 16
CAPACITY = 2000 #队列中元素个数
MAX_STEP = 15000
learning_rate = 0.0001 #小于0.001
   
#%% 从测试集中选一张图片进行测试

train_dir = 'data/train/'      # 训练图片文件夹
logs_train_dir = 'logs/train/' # 保存训练结果文件夹

train, train_label = import_data.get_files(train_dir)

train_batch, train_label_batch = import_data.get_batch(train,
                                                         train_label,
                                                         IMG_W,
                                                         IMG_H,
                                                         BATCH_SIZE,
                                                         CAPACITY)
train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES)
train_loss = model.losses(train_logits, train_label_batch)
train_op = model.training(train_loss, learning_rate)
train_acc = model.evalution(train_logits, train_label_batch)

summary_op = tf.summary.merge_all()
sess = tf.Session()
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
saver = tf.train.Saver()

sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess = sess, coord = coord)

# 开始训练
try:
    for step in np.arange(MAX_STEP):
        if coord.should_stop():
            break
        _, tra_loss, tra_acc = sess.run([train_op, train_loss, train_acc])
        
        if step % 50 == 0:
            print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc))
            summary_str = sess.run(summary_op)
            train_writer.add_summary(summary_str, step)
            
        if step % 2000 == 0 or (step + 1) == MAX_STEP:
            checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step = step)
            
except tf.errors.OutOfRangeError:
    print('Done training -- epoch limit reached')
finally:
    coord.request_stop()
    
coord.join(threads)
sess.close()

4、读取训练保存的模型，估计输入的图片

#%% Evaluate one image
import model
import import_data
import numpy as np
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt

def get_one_image(train):
    n = len(train)
    ind = np.random.randint(0,n)
    img_dir = train[ind]
    
    image = Image.open(img_dir)
    plt.imshow(image)
    image = image.resize([208, 208])
    image = np.array(image)
    
    return image

    
train_dir = 'data/train/'
train, train_label = import_data.get_files(train_dir)
image_array = get_one_image(train)

with tf.Graph().as_default():
    BATCH_SIZE = 1
    N_CLASSES = 2
    
    image = tf.cast(image_array, tf.float32) # 转换图片格式
    image = tf.reshape(image, [1, 208, 208, 3]) # 修改图片大小
    logit = model.inference(image, BATCH_SIZE, N_CLASSES) #
    logit = tf.nn.softmax(logit) #激活函数
    
    x = tf.placeholder(tf.float32, shape = [208, 208, 3]) # 
    
    logs_train_dir = 'logs/train/'
    
    saver = tf.train.Saver()
    
    # 下载训练好的模型        
    with tf.Session() as sess:
        # 下载模型。。。。。。。
        print("Reading checkpoints...")
        ckpt = tf.train.get_checkpoint_state(logs_train_dir)
        if ckpt and ckpt.model_checkpoint_path:
            global_step = ckpt.model_checkpoint_path.split('/')[-1].split('.')[-1]
            saver.restore(sess, ckpt.model_checkpoint_path) # 重载模型
            print('Loading success, global_step is %s' % global_step)
        else:
            print("No checkpoint file found")
            
        prediction = sess.run(logit, feed_dict = {x: image_array})
        max_index = np.argmax(prediction) # 得到prediction的索引
        if max_index == 0:
            print('This is a cat with possiblity %.6f' %prediction[:, 0])
        else:
            print('This is a dog with possiblity %.6f' %prediction[:, 1])

傻不拉几的程序员

关注

5
点赞
踩
53

收藏

觉得还不错? 一键收藏
9
评论
tensorflow实现猫狗分类（LeNet-5）

本文来源于网络视频作者的提供，这是一个二分类问题，分三部分记载：1、数据处理，也就是打上标签，猫：0、狗：1#%%import tensorflow as tfimport numpy as npimport os# img_width = 208img_height = 208#%% 获取图片及生成标签train_dir = 'G:/tensorflow/ca...
复制链接

扫一扫

专栏目录