猫狗识别

最新推荐文章于 2024-06-11 08:31:46 发布

diluosixu

最新推荐文章于 2024-06-11 08:31:46 发布

阅读量264

点赞数

分类专栏：深度学习

本文链接：https://blog.csdn.net/u012763126/article/details/117327670

版权

深度学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

数据集

cat图片12500张，dog图片12500张，数据集格式如下：

效果

input_data.py

import os

import tensorflow as tf

from PIL import Image

import matplotlib.pyplot as plt

import numpy as np

import cv2

def get_files(file_dir):

# file_dir: 文件夹路径

# return: 乱序后的图片和标签

cats = []

label_cats = []

dogs = []

label_dogs = []

# 载入数据路径并写入标签值

for file in os.listdir(file_dir):

name = file.split(sep='.')

# name的形式为['dog', '9981', 'jpg']

# os.listdir将名字转换为列表表达

if name[0] == 'cat':

cats.append(file_dir + file)

# 注意文件路径和名字之间要加分隔符，不然后面查找图片会提示找不到图片

# 或者在后面传路径的时候末尾加两// 'D:/Python/… /data/train//'

label_cats.append(0)

else:

dogs.append(file_dir + file)

label_dogs.append(1)

# 猫为0，狗为1

print("There are %d cats\nThere are %d dogs" % (len(cats), len(dogs)))

# 打乱文件顺序

image_list = np.hstack((cats, dogs))

label_list = np.hstack((label_cats, label_dogs))

# np.hstack()方法将猫和狗图片和标签整合到一起,标签也整合到一起

temp = np.array([image_list, label_list])

# 这里的数组出来的是2行10列，第一行是image_list的数据，第二行是label_list的数据

temp = temp.transpose() # 转置

# 将其转换为10行2列，第一列是image_list的数据，第二列是label_list的数据

np.random.shuffle(temp)

# 对应的打乱顺序

image_list = list(temp[:, 0]) # 取所有行的第0列数据

label_list = list(temp[:, 1]) # 取所有行的第1列数据，并转换为int

label_list = [int(i) for i in label_list]

return image_list, label_list

# 生成相同大小的批次

def get_batch(image, label, image_W, image_H, batch_size, capacity):

# image, label: 要生成batch的图像和标签list

# image_W, image_H: 图片的宽高

# batch_size: 每个batch有多少张图片

# capacity: 队列容量

# return: 图像和标签的batch

# 将原来的python.list类型转换成tf能够识别的格式

image = tf.cast(image, tf.string)#强制类型转换

label = tf.cast(label, tf.int32)

# 生成队列。我们使用slice_input_producer()来建立一个队列，将image和label放入一个list中当做参数传给该函数

input_queue = tf.train.slice_input_producer([image, label])

image_contents = tf.read_file(input_queue[0])

# 按队列读数据和标签

label = input_queue[1]

image = tf.image.decode_jpeg(image_contents, channels=3)

# 要按照图片格式进行解码。本例程中训练数据是jpg格式的，所以使用decode_jpeg()解码器，

# 如果是其他格式，就要用其他geshi具体可以从官方API中查询。

# 注意decode出来的数据类型是uint8，之后模型卷积层里面conv2d()要求输入数据为float32类型

# 统一图片大小

# 通过裁剪统一,包括裁剪和扩充

# image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)

# 我的方法，通过缩小图片，采用NEAREST_NEIGHBOR插值方法

image = tf.image.resize_images(image, [image_H, image_W], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,

align_corners=False)

image = tf.cast(image, tf.float32)

# 因为没有标准化，所以需要转换类型

# image = tf.image.per_image_standardization(image) # 标准化数据

image_batch, label_batch = tf.train.batch([image, label],

batch_size=batch_size,

num_threads=64, # 线程

capacity=capacity)

# image_batch是一个4D的tensor，[batch, width, height, channels]，

# label_batch是一个1D的tensor，[batch]。

# 这行多余？

label_batch = tf.reshape(label_batch, [batch_size])

return image_batch, label_batch

model.py

#coding=utf-8

import tensorflow as tf

# 结构

# conv1 卷积层 1

# pooling1_lrn 池化层 1

# conv2 卷积层 2

# pooling2_lrn 池化层 2

# local3 全连接层 1

# local4 全连接层 2

# softmax 全连接层 3

def inference(images, batch_size, n_classes):

# 一个简单的卷积神经网络，卷积+池化层x2，全连接层x2，最后一个softmax层做分类。

# 卷积层1

# 16个3x3的卷积核（3通道），padding=’SAME’，表示padding后卷积的图与原图尺寸一致，激活函数relu()

with tf.variable_scope('conv1') as scope:

#tf.tuncated_normal从截断的正态分布中输出随机值,

# 生成的值服从具有指定平均值和标准偏差的状态分布，如果生成的值大于平均值两个标准偏差的值，则丢弃

#stddev正太分布的标准差

weights = tf.Variable(tf.truncated_normal(shape=[3, 3, 3, 16], stddev=0.1, dtype=tf.float32),

name='weights', dtype=tf.float32)

#tf.constant初始化常量

biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[16]),

name='biases', dtype=tf.float32)

#nn.conv2d，第一个参数为input,指需要做卷积的输入图像，第二个参数，卷积核，第三个参数步长，

# 第四个设置为SAME表示可以停留在图像边上

conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME')

pre_activation = tf.nn.bias_add(conv, biases)

conv1 = tf.nn.relu(pre_activation, name=scope.name)

# 池化层1

# 3x3最大池化，步长strides为2，池化后执行lrn()操作，局部响应归一化，对训练有利。

with tf.variable_scope('pooling1_lrn') as scope:

#第一个参数，需要池化的输入

#第二个参数池化窗口的大小

#第三个参数步长

#第四个参数同上

pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling1')

norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')

# 卷积层2

# 16个3x3的卷积核（16通道），padding=’SAME’，表示padding后卷积的图与原图尺寸一致，激活函数relu()

with tf.variable_scope('conv2') as scope:

weights = tf.Variable(tf.truncated_normal(shape=[3, 3, 16, 16], stddev=0.1, dtype=tf.float32),

name='weights', dtype=tf.float32)

biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[16]),

name='biases', dtype=tf.float32)

conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME')

pre_activation = tf.nn.bias_add(conv, biases)

conv2 = tf.nn.relu(pre_activation, name='conv2')

# 池化层2

# 3x3最大池化，步长strides为2，池化后执行lrn()操作，

# pool2 and norm2

with tf.variable_scope('pooling2_lrn') as scope:

norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')

pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pooling2')

# 全连接层3

# 128个神经元，将之前pool层的输出reshape成一行，激活函数relu()

with tf.variable_scope('local3') as scope:

reshape = tf.reshape(pool2, shape=[batch_size, -1])

dim = reshape.get_shape()[1].value

weights = tf.Variable(tf.truncated_normal(shape=[dim, 128], stddev=0.005, dtype=tf.float32),

name='weights', dtype=tf.float32)

biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[128]),

name='biases', dtype=tf.float32)

local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)

# 全连接层4

# 128个神经元，激活函数relu()

with tf.variable_scope('local4') as scope:

weights = tf.Variable(tf.truncated_normal(shape=[128, 128], stddev=0.005, dtype=tf.float32),

name='weights', dtype=tf.float32)

biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[128]),

name='biases', dtype=tf.float32)

local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')

# dropout层

# with tf.variable_scope('dropout') as scope:

# drop_out = tf.nn.dropout(local4, 0.8)

# Softmax回归层

# 将前面的FC层输出，做一个线性回归，计算出每一类的得分，在这里是2类，所以这个层输出的是两个得分。

with tf.variable_scope('softmax_linear') as scope:

weights = tf.Variable(tf.truncated_normal(shape=[128, n_classes], stddev=0.005, dtype=tf.float32),

name='softmax_linear', dtype=tf.float32)

biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[n_classes]),

name='biases', dtype=tf.float32)

softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')

return softmax_linear

# -----------------------------------------------------------------------------

# loss计算

# 传入参数：logits，网络计算输出值。labels，真实值，在这里是0或者1

# 返回参数：loss，损失值

def losses(logits, labels):

with tf.variable_scope('loss') as scope:

cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels,

name='xentropy_per_example')

loss = tf.reduce_mean(cross_entropy, name='loss')

tf.summary.scalar(scope.name + '/loss', loss)

return loss

# --------------------------------------------------------------------------

# loss损失值优化

# 输入参数：loss。learning_rate，学习速率。

# 返回参数：train_op，训练op，这个参数要输入sess.run中让模型去训练。

def trainning(loss, learning_rate):

with tf.name_scope('optimizer'):

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

global_step = tf.Variable(0, name='global_step', trainable=False)

train_op = optimizer.minimize(loss, global_step=global_step)

return train_op

# -----------------------------------------------------------------------

# 评价/准确率计算

# 输入参数：logits，网络计算值。labels，标签，也就是真实值，在这里是0或者1。

# 返回参数：accuracy，当前step的平均准确率，也就是在这些batch中多少张图片被正确分类了。

def evaluation(logits, labels):

with tf.variable_scope('accuracy') as scope:

correct = tf.nn.in_top_k(logits, labels, 1)

correct = tf.cast(correct, tf.float16)

accuracy = tf.reduce_mean(correct)

tf.summary.scalar(scope.name + '/accuracy', accuracy)

return accuracy

training.py

import os

import numpy as np

import tensorflow as tf

import input_data

import model

import time

N_CLASSES = 2

IMG_W = 208

IMG_H = 208

BATCH_SIZE = 16

CAPACITY = 2000 #队列中元素个数

MAX_STEP = 8000

learning_rate = 0.0001 #小于0.001

print("I'm OK")

train_dir = 'D:\\workspace4\\tensorflow\\train\\' # 训练图片文件夹

logs_train_dir = 'D:\\workspace4\\tensorflow\\save1\\' # 保存训练结果文件夹

train, train_label = input_data.get_files(train_dir)

train_batch, train_label_batch = input_data.get_batch(train,

train_label,

IMG_W,

IMG_H,

BATCH_SIZE,

CAPACITY)

#训练操作定义

sess = tf.Session()

train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES)

train_loss = model.losses(train_logits, train_label_batch)

train_op = model.trainning(train_loss, learning_rate)

train_acc = model.evaluation(train_logits, train_label_batch)

#train_label_batch = tf.one_hot(train_label_batch,2,1,0)

#测试操作定义

summary_op = tf.summary.merge_all()

#产生一个writer来写log文件

train_writer = tf.summary.FileWriter(logs_train_dir,sess.graph)

saver = tf.train.Saver()

sess.run(tf.global_variables_initializer())

coord = tf.train.Coordinator()

threads = tf.train.start_queue_runners(sess=sess,coord = coord)#加入队列，很重要

tra_loss = .0

tra_acc = .0

# val_loss = .0

# val_acc = .0

try:

start = time.clock()#计算每一个step所花的时间

for step in np.arange(MAX_STEP):

if coord.should_stop():

break

_,tra_loss_,tra_acc_ = sess.run([train_op,train_loss,train_acc])

# val_loss_, val_acc_ = sess.run([test_loss, test_acc])

#下面这一段为我为了打印神经网络最后一层变化写的，可以不要

'''

train,label = sess.run([train_logits,train_label_batch])

#print(train)

L = []

for i in train:

max_ = np.argmax(i)

L.append(max_)

print(L)

print(label)

'''

tra_loss = tra_loss+tra_loss_

tra_acc = tra_acc+tra_acc_

# val_loss = val_loss+val_loss_

# val_acc = val_acc+val_acc_

if (step+1) % 50 == 0 and step!=0:

end = time.clock()

print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step+1, tra_loss/50, tra_acc * 100.0/50))

#print('Step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss/50,val_acc*100.0/50))

print(str(end-start))

tra_loss = .0

tra_acc = .0

summary_str = sess.run(summary_op)

train_writer.add_summary(summary_str, step)

start = time.clock()

# 每隔2000步，保存一次训练好的模型

if step%2000==0 or step == MAX_STEP-1:

checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')

saver.save(sess, checkpoint_path, global_step=step)

except tf.errors.OutOfRangeError:

print('Done training -- epoch limit reached')

finally:

coord.request_stop()

coord.join(threads)

sess.close()

evaluateCatOrDog.py

#coding=utf-8

import tensorflow as tf

from PIL import Image

import matplotlib.pyplot as plt

import input_data

import numpy as np

import model

import os

#从训练集中选取一张图片

def get_one_image(train):

files = os.listdir(train)

n = len(files)

ind = np.random.randint(0,n)

img_dir = os.path.join(train,files[ind])

image = Image.open(img_dir)

plt.imshow(image)

plt.show()

image = image.resize([208, 208])

image = np.array(image)

return image

def evaluate_one_image():

train = 'D:\\workspace4\\tensorflow\\test1\\'

# 获取图片路径集和标签集

image_array = get_one_image(train)

with tf.Graph().as_default():

BATCH_SIZE = 1 # 因为只读取一副图片所以batch 设置为1

N_CLASSES = 2 # 2个输出神经元，［1，0］或者［0，1］猫和狗的概率

# 转化图片格式

image = tf.cast(image_array, tf.float32)

# 图片标准化

image = tf.image.per_image_standardization(image)

# 图片原来是三维的 [208, 208, 3] 重新定义图片形状改为一个4D 四维的 tensor

image = tf.reshape(image, [1, 208, 208, 3])

logit = model.inference(image, BATCH_SIZE, N_CLASSES)

# 因为 inference 的返回没有用激活函数，所以在这里对结果用softmax 激活

logit = tf.nn.softmax(logit)

# 用最原始的输入数据的方式向模型输入数据 placeholder

x = tf.placeholder(tf.float32, shape=[208, 208, 3])

# 我门存放模型的路径

logs_train_dir = 'D:\\workspace4\\tensorflow\\save1\\'

# 定义saver

saver = tf.train.Saver()

with tf.Session() as sess:

print("从指定的路径中加载模型。。。。")

# 将模型加载到sess 中

ckpt = tf.train.get_checkpoint_state(logs_train_dir)

if ckpt and ckpt.model_checkpoint_path:

global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]

saver.restore(sess, ckpt.model_checkpoint_path)

print('模型加载成功, 训练的步数为 %s' % global_step)

else:

print('模型加载失败，，，文件没有找到')

# 将图片输入到模型计算

prediction = sess.run(logit, feed_dict={x: image_array})

# 获取输出结果中最大概率的索引

max_index = np.argmax(prediction)

if max_index==0:

print('猫的概率 %.6f' %prediction[:, 0])

else:

print('狗的概率 %.6f' %prediction[:, 1])

# 测试

evaluate_one_image()

diluosixu

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
猫狗识别

数据集cat图片12500张，dog图片12500张，数据集格式如下：效果input_data.pyimport osimport tensorflow as tffrom PIL import Imageimport matplotlib.pyplot as pltimport numpy as npimport cv2def get_files(file_dir): # file_dir: 文件夹路径...
复制链接

扫一扫