猫狗大战背景介绍
猫狗大战数据集来源于Kaggle上的一个竞赛:Dogs vs. Cats,猫狗大战的数据集下载地址,其中数据集有12500只猫和12500只狗
- http://www.kaggle.com/c/dogs-vs-cats
使用Finetuning对VGGNet进行调整,从而针对猫狗大战的训练集进行训练,创建工程文件,所有素材如下
代码示例
step1 对模型的修改
首先是对模型的修改(VGG16_model.py文件),在这里原先的输出结果是对1000个不同的类别进行判定,而在此是对2个图像,也就是猫和狗的判断,因此首先第一步就是修改输出层的全连接数据
def fc_layers(self):
self.fc6 = self.fc("fc1", self.pool5, 4096, trainable=False) #语句变动
self.fc7 = self.fc("fc2", self.fc6, 4096, trainable=False) #语句变动
self.fc8 = self.fc("fc3", self.fc7, 2)
这里是最后一层的输出通道被设置成2,而对于其他部分,定义创建卷积层和全连接层的方法则无需做出太大改动。
def conv(self,name, input_data, out_channel):
in_channel = input_data.get_shape()[-1]
with tf.variable_scope(name):
kernel = tf.get_variable("weights", [3, 3, in_channel, out_channel], dtype=tf.float32, trainable=False) #语句变动
biases = tf.get_variable("biases", [out_channel], dtype=tf.float32, trainable=False) #语句变动
conv_res = tf.nn.conv2d(input_data, kernel, [1, 1, 1, 1], padding="SAME")
res = tf.nn.bias_add(conv_res, biases)
out = tf.nn.relu(res, name=name)
self.parameters += [kernel, biases]
return out
def fc(self, name, input_data, out_channel, trainable=True):
shape = input_data.get_shape().as_list()
if len(shape) == 4:
size = shape[-1] * shape[-2] * shape[-3]
else:size = shape[1]
input_data_flat = tf.reshape(input_data,[-1,size])
with tf.variable_scope(name):
weights = tf.get_variable(name="weights",shape=[size,out_channel],dtype=tf.float32,trainable=trainable) #语句变动
biases = tf.get_variable(name="biases",shape=[out_channel],dtype=tf.float32, trainable=trainable) #语句变动
res = tf.matmul(input_data_flat,weights)
out = tf.nn.relu(tf.nn.bias_add(res,biases))
self.parameters += [weights, biases]
return out
step2 数据的输入
对于修改后的模型,需要对其进行重新训练,而首要条件就是数据输入,在这里笔者使用数据的输入流方式。代码如下
def get_file(file_dir):
images = []
temp = []
for root, sub_folders, files in os.walk(file_dir):
for name in files:
images.append(os.path.join(root, name))
for name in sub_folders:
temp.append(os.path.join(root, name))
labels = []
for one_folder in temp:
n_img = len(os.listdir(one_folder))
letter = one_folder.split('/')[-1]
if letter == 'cat':
labels = np.append(labels, n_img * [0])
else:
labels = np.append(labels, n_img * [1])
# shuffle
temp = np.array([images, labels])
temp = temp.transpose()
np.random.shuffle(temp)
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(float(i)) for i in label_list]
return image_list, label_list
这里定义的get_file函数对输入文件的文件夹进行分类,通过以不同的文件夹作为分类标准将图片分为2类,使用2个列表文件分别用来存储图片地址和对应的标记地址,同时我们需要按照程序的要求,将train文件夹中的图片,分成cat和dog 文件夹,如图所示:
def get_batch(image_list, label_list, img_width, img_height, batch_size, capacity):
image = tf.cast(image_list, tf.string)
label = tf.cast(label_list, tf.int32)
input_queue = tf.train.slice_input_producer([image,label])
label = input_queue[1]
image_contents = tf.read_file(input_queue[0])
image = tf.image.decode_jpeg(image_contents,channels=3)
image = tf.image.resize_image_with_crop_or_pad(image,img_width,img_height)
image = tf.image.per_image_standardization(image) # 将图片标准化
image_batch,label_batch = tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)
label_batch = tf.reshape(label_batch,[batch_size])
return image_batch,label_batch
get_batch函数是通过对列表地址的读取而循环载入具有参数batch_size大小而定的图片,并读取相应的图片标签作为数据标签一同进行训练,完整定义如下:
import tensorflow as tf
import numpy as np
import os
img_width = 224
img_height = 224
def get_file(file_dir):
images = []
temp = []
for root, sub_folders, files in os.walk(file_dir):
for name in files:
images.append(os.path.join(root, name))
for name in sub_folders:
temp.append(os.path.join(root, name))
labels = []
for one_folder in temp:
n_img = len(os.listdir(one_folder))
letter = one_folder.split('/')[-1]
if letter == 'cat':
labels = np.append(labels, n_img * [0])
else:
labels = np.append(labels, n_img * [1])
# shuffle
temp = np.array([images, labels])
temp = temp.transpose()
np.random.shuffle(temp)
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(float(i)) for i in label_list]
return image_list, label_list
def get_batch(image_list, label_list, img_width, img_height, batch_size, capacity):
image = tf.cast(image_list, tf.string)
label = tf.cast(label_list, tf.int32)
input_queue = tf.train.slice_input_producer([image,label])
label = input_queue[1]
image_contents = tf.read_file(input_queue[0])
image = tf.image.decode_jpeg(image_contents,channels=3)
image = tf.image.resize_image_with_crop_or_pad(image,img_width,img_height)
image = tf.image.per_image_standardization(image) # 将图片标准化
image_batch,label_batch = tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)
label_batch = tf.reshape(label_batch,[batch_size])
return image_batch,label_batch
def onehot(labels):
n_sample = len(labels)
n_class = max(labels) + 1
onehot_labels = np.zeros((n_sample, n_class))
onehot_labels[np.arange(n_sample), labels] = 1
return onehot_labels
step3 模型的重新训练与存储
Finetuning最重要的一个步骤就是模型的重新训练与存储。首先对于模型的值的输出,在类中已经做了定义,因此只需要将定义的模型类初始化后输出赋予一个特定的变量即可
vgg = model.vgg16(x_imgs)
fc3_cat_and_dog = vgg.probs
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc3_cat_and_dog, labels=y_imgs))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
这里同时定义了损失函数已经最小化方法,完整代码如下:
import numpy as np
import tensorflow as tf
import VGG16_model as model
import create_and_read_TFRecord2 as reader2
if __name__ == '__main__':
X_train, y_train = reader2.get_file("./train/")
image_batch, label_batch = reader2.get_batch(X_train, y_train, 224, 224, 25, 256)
x_imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
y_imgs = tf.placeholder(tf.int32, [None, 2])
vgg = model.vgg16(x_imgs)
fc3_cat_and_dog = vgg.probs
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc3_cat_and_dog, labels=y_imgs))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
vgg.load_weights('./vgg16_weights.npz', sess)
saver = vgg.saver()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
import time
start_time = time.time()
for i in range(200):
image, label = sess.run([image_batch, label_batch])
labels = reader2.onehot(label)
sess.run(optimizer, feed_dict={x_imgs: image, y_imgs: labels})
loss_record = sess.run(loss, feed_dict={x_imgs: image, y_imgs: labels})
print("now the loss is %f " % loss_record)
end_time = time.time()
print('time: ', (end_time - start_time))
start_time = end_time
print("----------epoch %d is finished---------------" % i)
saver.save(sess, "./model/")
print("Optimization Finished!")
在训练函数中使用了Tensorflow的队列方式进行数据输入,而对于权重的重新载入也使用的是前面文章类似的方式,最终数据进行200次迭代,存储模型在model文件夹中。
step4 模型的复用
import tensorflow as tf
from scipy.misc import imread, imresize
import VGG16_model as model
imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
sess = tf.Session()
vgg = model.vgg16(imgs)
fc3_cat_and_dog = vgg.probs
saver = vgg.saver()
saver.restore(sess, './model/')
import os
for root, sub_folders, files in os.walk('./test/'):
i = 0
cat = 0
dog = 0
for name in files:
i += 1
filepath = os.path.join(root, name)
try:
img1 = imread(filepath, mode='RGB')
img1 = imresize(img1, (224, 224))
except:
print("remove", filepath)
prob = sess.run(fc3_cat_and_dog, feed_dict={vgg.imgs: [img1]})
import numpy as np
max_index = np.argmax(prob)
if max_index == 0:
cat += 1
else:
dog += 1
if i % 50 == 0:
acc = (cat * 1.)/(dog + cat)
print(acc)
print("-----------img number is %d------------" % i)