猫狗识别

数据集

cat图片12500张,dog图片12500张,数据集格式如下:

 

效果

 

 

 

input_data.py

import os

import tensorflow as tf

from PIL import Image

import matplotlib.pyplot as plt

import numpy as np

import cv2

 

def get_files(file_dir):

    # file_dir: 文件夹路径

    # return: 乱序后的图片和标签

 

    cats = []

    label_cats = []

    dogs = []

    label_dogs = []

    # 载入数据路径并写入标签值

    for file in os.listdir(file_dir):

        name = file.split(sep='.')

        # name的形式为['dog', '9981', 'jpg']

        # os.listdir将名字转换为列表表达

        if name[0] == 'cat':

            cats.append(file_dir + file)

            # 注意文件路径和名字之间要加分隔符,不然后面查找图片会提示找不到图片

            # 或者在后面传路径的时候末尾加两//  'D:/Python/… /data/train//'

            label_cats.append(0)

        else:

            dogs.append(file_dir + file)

            label_dogs.append(1)

        # 猫为0,狗为1

 

    print("There are %d cats\nThere are %d dogs" % (len(cats), len(dogs)))

 

    # 打乱文件顺序

    image_list = np.hstack((cats, dogs))

    label_list = np.hstack((label_cats, label_dogs))

    # np.hstack()方法将猫和狗图片和标签整合到一起,标签也整合到一起

 

    temp = np.array([image_list, label_list])

    # 这里的数组出来的是2行10列,第一行是image_list的数据,第二行是label_list的数据

    temp = temp.transpose()  # 转置

    # 将其转换为10行2列,第一列是image_list的数据,第二列是label_list的数据

    np.random.shuffle(temp)

    # 对应的打乱顺序

    image_list = list(temp[:, 0])  # 取所有行的第0列数据

    label_list = list(temp[:, 1])  # 取所有行的第1列数据,并转换为int

    label_list = [int(i) for i in label_list]

 

    return image_list, label_list

 

 

# 生成相同大小的批次

def get_batch(image, label, image_W, image_H, batch_size, capacity):

    # image, label: 要生成batch的图像和标签list

    # image_W, image_H: 图片的宽高

    # batch_size: 每个batch有多少张图片

    # capacity: 队列容量

    # return: 图像和标签的batch

 

    # 将原来的python.list类型转换成tf能够识别的格式

    image = tf.cast(image, tf.string)#强制类型转换

    label = tf.cast(label, tf.int32)

 

    # 生成队列。我们使用slice_input_producer()来建立一个队列,将image和label放入一个list中当做参数传给该函数

    input_queue = tf.train.slice_input_producer([image, label])

 

    image_contents = tf.read_file(input_queue[0])

    # 按队列读数据和标签

    label = input_queue[1]

    image = tf.image.decode_jpeg(image_contents, channels=3)

    # 要按照图片格式进行解码。本例程中训练数据是jpg格式的,所以使用decode_jpeg()解码器,

    # 如果是其他格式,就要用其他geshi具体可以从官方API中查询。

    # 注意decode出来的数据类型是uint8,之后模型卷积层里面conv2d()要求输入数据为float32类型

 

    # 统一图片大小

    # 通过裁剪统一,包括裁剪和扩充

    # image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)

    # 我的方法,通过缩小图片,采用NEAREST_NEIGHBOR插值方法

    image = tf.image.resize_images(image, [image_H, image_W], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,

                                   align_corners=False)

    image = tf.cast(image, tf.float32)

    # 因为没有标准化,所以需要转换类型

    # image = tf.image.per_image_standardization(image)   # 标准化数据

    image_batch, label_batch = tf.train.batch([image, label],

                                              batch_size=batch_size,

                                              num_threads=64,  # 线程

                                              capacity=capacity)

    # image_batch是一个4D的tensor,[batch, width, height, channels],

    # label_batch是一个1D的tensor,[batch]。

    # 这行多余?

    label_batch = tf.reshape(label_batch, [batch_size])

 

return image_batch, label_batch

model.py

#coding=utf-8 

import tensorflow as tf 

# 结构

# conv1   卷积层 1

# pooling1_lrn  池化层 1

# conv2  卷积层 2

# pooling2_lrn 池化层 2

# local3 全连接层 1

# local4 全连接层 2

# softmax 全连接层 3

 

def inference(images, batch_size, n_classes):

    # 一个简单的卷积神经网络,卷积+池化层x2,全连接层x2,最后一个softmax层做分类。

    # 卷积层1

    # 16个3x3的卷积核(3通道),padding=’SAME’,表示padding后卷积的图与原图尺寸一致,激活函数relu()

    with tf.variable_scope('conv1') as scope:

        #tf.tuncated_normal从截断的正态分布中输出随机值,

        # 生成的值服从具有指定平均值和标准偏差的状态分布,如果生成的值大于平均值两个标准偏差的值,则丢弃

        #stddev正太分布的标准差

        weights = tf.Variable(tf.truncated_normal(shape=[3, 3, 3, 16], stddev=0.1, dtype=tf.float32),

                              name='weights', dtype=tf.float32)

        #tf.constant初始化常量

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[16]),

                             name='biases', dtype=tf.float32)

        #nn.conv2d,第一个参数为input,指需要做卷积的输入图像,第二个参数,卷积核,第三个参数步长,

        # 第四个设置为SAME表示可以停留在图像边上

        conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME')

        pre_activation = tf.nn.bias_add(conv, biases)

        conv1 = tf.nn.relu(pre_activation, name=scope.name)

    # 池化层1

    # 3x3最大池化,步长strides为2,池化后执行lrn()操作,局部响应归一化,对训练有利。

    with tf.variable_scope('pooling1_lrn') as scope:

        #第一个参数,需要池化的输入

        #第二个参数池化窗口的大小

        #第三个参数步长

        #第四个参数同上

        pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling1')

        #

        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')

    # 卷积层2

    # 16个3x3的卷积核(16通道),padding=’SAME’,表示padding后卷积的图与原图尺寸一致,激活函数relu()

    with tf.variable_scope('conv2') as scope:

        weights = tf.Variable(tf.truncated_normal(shape=[3, 3, 16, 16], stddev=0.1, dtype=tf.float32),

                              name='weights', dtype=tf.float32)

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[16]),

                             name='biases', dtype=tf.float32)

 

        conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME')

        pre_activation = tf.nn.bias_add(conv, biases)

        conv2 = tf.nn.relu(pre_activation, name='conv2')

    # 池化层2

    # 3x3最大池化,步长strides为2,池化后执行lrn()操作,

    # pool2 and norm2

 

    with tf.variable_scope('pooling2_lrn') as scope:

        norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')

        pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pooling2')

    # 全连接层3

    # 128个神经元,将之前pool层的输出reshape成一行,激活函数relu()

 

    with tf.variable_scope('local3') as scope:

        reshape = tf.reshape(pool2, shape=[batch_size, -1])

        dim = reshape.get_shape()[1].value

        weights = tf.Variable(tf.truncated_normal(shape=[dim, 128], stddev=0.005, dtype=tf.float32),

                              name='weights', dtype=tf.float32)

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[128]),

                             name='biases', dtype=tf.float32)

        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)

    # 全连接层4

 

    # 128个神经元,激活函数relu()

 

    with tf.variable_scope('local4') as scope:

        weights = tf.Variable(tf.truncated_normal(shape=[128, 128], stddev=0.005, dtype=tf.float32),

                              name='weights', dtype=tf.float32)

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[128]),

                             name='biases', dtype=tf.float32)

        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')

 

    # dropout层

    #    with tf.variable_scope('dropout') as scope:

    #        drop_out = tf.nn.dropout(local4, 0.8)

    # Softmax回归层

 

    # 将前面的FC层输出,做一个线性回归,计算出每一类的得分,在这里是2类,所以这个层输出的是两个得分。

    with tf.variable_scope('softmax_linear') as scope:

        weights = tf.Variable(tf.truncated_normal(shape=[128, n_classes], stddev=0.005, dtype=tf.float32),

                              name='softmax_linear', dtype=tf.float32)

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[n_classes]),

                             name='biases', dtype=tf.float32)

        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')

    return softmax_linear

 

# -----------------------------------------------------------------------------

# loss计算

# 传入参数:logits,网络计算输出值。labels,真实值,在这里是0或者1

# 返回参数:loss,损失值

 

def losses(logits, labels):

    with tf.variable_scope('loss') as scope:

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels,

                                                                       name='xentropy_per_example')

        loss = tf.reduce_mean(cross_entropy, name='loss')

        tf.summary.scalar(scope.name + '/loss', loss)

    return loss

 

# --------------------------------------------------------------------------

# loss损失值优化

# 输入参数:loss。learning_rate,学习速率。

# 返回参数:train_op,训练op,这个参数要输入sess.run中让模型去训练。

def trainning(loss, learning_rate):

    with tf.name_scope('optimizer'):

        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

        global_step = tf.Variable(0, name='global_step', trainable=False)

        train_op = optimizer.minimize(loss, global_step=global_step)

    return train_op

 

# -----------------------------------------------------------------------

 

# 评价/准确率计算

 

# 输入参数:logits,网络计算值。labels,标签,也就是真实值,在这里是0或者1。

 

# 返回参数:accuracy,当前step的平均准确率,也就是在这些batch中多少张图片被正确分类了。

 

def evaluation(logits, labels):

    with tf.variable_scope('accuracy') as scope:

        correct = tf.nn.in_top_k(logits, labels, 1)

        correct = tf.cast(correct, tf.float16)

        accuracy = tf.reduce_mean(correct)

        tf.summary.scalar(scope.name + '/accuracy', accuracy)

return accuracy

training.py

import os 

import numpy as np 

import tensorflow as tf 

import input_data    

import model

import time

 

N_CLASSES = 2

IMG_W = 208

IMG_H = 208

BATCH_SIZE = 16

CAPACITY = 2000 #队列中元素个数

MAX_STEP = 8000

learning_rate = 0.0001 #小于0.001

 

print("I'm OK")

train_dir = 'D:\\workspace4\\tensorflow\\train\\'  # 训练图片文件夹

logs_train_dir = 'D:\\workspace4\\tensorflow\\save1\\'  # 保存训练结果文件夹

 

train, train_label = input_data.get_files(train_dir)

 

train_batch, train_label_batch = input_data.get_batch(train,

                                                       train_label,

                                                       IMG_W,

                                                       IMG_H,

                                                       BATCH_SIZE,

                                                       CAPACITY)

 

 

 

#训练操作定义

sess = tf.Session()

 

train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES)

train_loss = model.losses(train_logits, train_label_batch)

train_op = model.trainning(train_loss, learning_rate)

train_acc = model.evaluation(train_logits, train_label_batch)

 

#train_label_batch = tf.one_hot(train_label_batch,2,1,0)

#测试操作定义

 

 

summary_op = tf.summary.merge_all()

 

#产生一个writer来写log文件

train_writer = tf.summary.FileWriter(logs_train_dir,sess.graph)

saver = tf.train.Saver()

 

sess.run(tf.global_variables_initializer())

coord = tf.train.Coordinator()

threads = tf.train.start_queue_runners(sess=sess,coord = coord)#加入队列,很重要

 

tra_loss = .0

tra_acc = .0

# val_loss = .0

# val_acc = .0

 

try:

    start = time.clock()#计算每一个step所花的时间

    for step in np.arange(MAX_STEP):

        if coord.should_stop():

            break

        _,tra_loss_,tra_acc_ = sess.run([train_op,train_loss,train_acc])

        # val_loss_, val_acc_ = sess.run([test_loss, test_acc])

        #下面这一段为我为了打印神经网络最后一层变化写的,可以不要

        '''

        train,label = sess.run([train_logits,train_label_batch])

        #print(train)

        L = []

        for i in train:

            max_ = np.argmax(i)

            L.append(max_)

        print(L)

        print(label)

        '''

        tra_loss = tra_loss+tra_loss_

        tra_acc = tra_acc+tra_acc_

        # val_loss = val_loss+val_loss_

        # val_acc = val_acc+val_acc_

 

        if (step+1) % 50 == 0 and step!=0:

            end = time.clock()

            print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step+1, tra_loss/50, tra_acc * 100.0/50))

            #print('Step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss/50,val_acc*100.0/50))

            print(str(end-start))

            tra_loss = .0

            tra_acc = .0

            summary_str = sess.run(summary_op)

            train_writer.add_summary(summary_str, step)

 

            start = time.clock()

 

 

        # 每隔2000步,保存一次训练好的模型

        if step%2000==0 or step == MAX_STEP-1:

            checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')

            saver.save(sess, checkpoint_path, global_step=step)

 

           

except tf.errors.OutOfRangeError:

    print('Done training -- epoch limit reached')

 

finally:

    coord.request_stop()

 

coord.join(threads)

sess.close()

evaluateCatOrDog.py

#coding=utf-8 

import tensorflow as tf

from PIL import Image 

import matplotlib.pyplot as plt

import input_data

import numpy as np

import model

import os

 

#从训练集中选取一张图片

def get_one_image(train):

    files = os.listdir(train)

    n = len(files)

    ind = np.random.randint(0,n)

    img_dir = os.path.join(train,files[ind]) 

    image = Image.open(img_dir) 

    plt.imshow(image)

    plt.show()

    image = image.resize([208, 208]) 

    image = np.array(image)

    return image 

 

 

def evaluate_one_image(): 

    train = 'D:\\workspace4\\tensorflow\\test1\\' 

 

    # 获取图片路径集和标签集

    image_array = get_one_image(train) 

     

    with tf.Graph().as_default(): 

        BATCH_SIZE = 1  # 因为只读取一副图片 所以batch 设置为1

        N_CLASSES = 2  # 2个输出神经元,[1,0] 或者 [0,1]猫和狗的概率

        # 转化图片格式

        image = tf.cast(image_array, tf.float32) 

        # 图片标准化

        image = tf.image.per_image_standardization(image)

        # 图片原来是三维的 [208, 208, 3] 重新定义图片形状 改为一个4D  四维的 tensor

        image = tf.reshape(image, [1, 208, 208, 3]) 

        logit = model.inference(image, BATCH_SIZE, N_CLASSES) 

        # 因为 inference 的返回没有用激活函数,所以在这里对结果用softmax 激活

        logit = tf.nn.softmax(logit) 

       

        # 用最原始的输入数据的方式向模型输入数据 placeholder

        x = tf.placeholder(tf.float32, shape=[208, 208, 3]) 

       

        # 我门存放模型的路径

        logs_train_dir = 'D:\\workspace4\\tensorflow\\save1\\'  

        # 定义saver

        saver = tf.train.Saver() 

         

        with tf.Session() as sess: 

             

            print("从指定的路径中加载模型。。。。")

            # 将模型加载到sess 中

            ckpt = tf.train.get_checkpoint_state(logs_train_dir) 

            if ckpt and ckpt.model_checkpoint_path: 

                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] 

                saver.restore(sess, ckpt.model_checkpoint_path) 

                print('模型加载成功, 训练的步数为 %s' % global_step) 

            else: 

                print('模型加载失败,,,文件没有找到') 

            # 将图片输入到模型计算

            prediction = sess.run(logit, feed_dict={x: image_array})

            # 获取输出结果中最大概率的索引

            max_index = np.argmax(prediction) 

            if max_index==0: 

                print('猫的概率 %.6f' %prediction[:, 0]) 

            else: 

                print('狗的概率 %.6f' %prediction[:, 1])

# 测试

evaluate_one_image()

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值