一、下载猫狗数据集
百度云链接如下
链接:https://pan.baidu.com/s/1KWYrGVVS6He7lO7skyhgQQ
提取码:p2dd
二、AlexNet实现
1、划分训练集和测试集
因为猫狗大战的测试集没有label所以说我们就从训练集中抽取20%作为测试集,代码如下图所示
divide.py
import os, random, shutil
def moveFile(fileDir):
pathDir = os.listdir(fileDir) #取图片的原始路径
filenumber=len(pathDir)
rate=0.2 #自定义抽取图片的比例,比方说100张抽10张,那就是0.1
picknumber=int(filenumber*rate) #按照rate比例从文件夹中取一定数量图片
sample = random.sample(pathDir, picknumber) #随机选取picknumber数量的样本图片
print (sample)
for name in sample:
shutil.move(fileDir+name, tarDir+name)
return
if __name__ == '__main__':
fileDir = "D:\\神经网络\\Alexnet\猫狗数据集\\train\\" #源图片文件夹路径
tarDir = 'D:\\神经网络\\Alexnet\\猫狗数据集\\test\\' #移动到新的文件夹路径
moveFile(fileDir)
按照代码将训练集抽出20%作为测试集,在我们的目录中要有源文件夹train,里面存放的是训练数据集,还要有一个空文件夹test,用来存放从训练集转移过来的测试集,这里用的是交叉验证法。
2、将训练集和测试集图片放缩为224x224
我刚开始训练的时候没有把训练集和测试集的图片规范化化为224x224,导致我测试的时候精度只有70%-80%,后来我寻找原因,把训练时候的图片截取出来,发现大部分训练集的图片在训练时候被reshape为224x224导致图片只被截取了一部分,比如说有的图片只有一条猫腿,所以这样训练肯定是不行的,将图片预处理之后,精度得到了显著的提高
reshape.py
import cv2
import os
def rebuild(file_dir, save_dir):
""" 将图片尺寸resize为224*224 """
print('Start to resize images...')
for file in os.listdir(file_dir):
file_path = os.path.join(file_dir, file)
try:
image = cv2.imread(file_path)
image_resized = cv2.resize(image, (224, 224))
save_path = save_dir + file
cv2.imwrite(save_path, image_resized)
except:
print(file_path)
os.remove(file_path)
print('Finished!')
rebuild('./train','./finaltrain/')
##./train代表初始训练集的地址,./finaltrain代表预处理之后图片存放的地址
将训练集预处理之后,同样方法预处理测试集
3、AlexNet实现
input_data.py
import tensorflow as tf
import os
import numpy as np
def get_files(file_dir):
cats = []
label_cats = []
dogs = []
label_dogs = []
for file in os.listdir(file_dir):
name = file.split(sep='.')
if 'cat' in name[0]:
cats.append(file_dir + file)
label_cats.append(0)
else:
if 'dog' in name[0]:
dogs.append(file_dir + file)
label_dogs.append(1)
image_list = np.hstack((cats,dogs))
label_list = np.hstack((label_cats,label_dogs))
#print('There are %d cats\nThere are %d dogs' %(len(cats), len(dogs)))
# 多个种类分别的时候需要把多个种类放在一起,打乱顺序,这里不需要
# 把标签和图片都放倒一个 temp 中 然后打乱顺序,然后取出来
temp = np.array([image_list,label_list])
temp = temp.transpose()
# 打乱顺序
np.random.shuffle(temp)
# 取出第一个元素作为 image 第二个元素作为 label
image_list = list(temp[:,0])
label_list = list(temp[:,1])
label_list = [int(i) for i in label_list]
return image_list,label_list
# image_W ,image_H 指定图片大小,batch_size 每批读取的个数 ,capacity队列中 最多容纳元素的个数
def get_batch(image,label,image_W,image_H,batch_size,capacity):
# 转换数据为 ts 能识别的格式
image = tf.cast(image,tf.string)
label = tf.cast(label, tf.int32)
# 将image 和 label 放倒队列里
input_queue = tf.train.slice_input_producer([image,label])
label = input_queue[1]
# 读取图片的全部信息
image_contents = tf.read_file(input_queue[0])
# 把图片解码,channels =3 为彩色图片, r,g ,b 黑白图片为 1 ,也可以理解为图片的厚度
image = tf.image.decode_jpeg(image_contents,channels =3)
# 将图片以图片中心进行裁剪或者扩充为 指定的image_W,image_H
image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
# 对数据进行标准化,标准化,就是减去它的均值,除以他的方差
image = tf.image.per_image_standardization(image)
# 生成批次 num_threads 有多少个线程根据电脑配置设置 capacity 队列中 最多容纳图片的个数 tf.train.shuffle_batch 打乱顺序,
image_batch, label_batch = tf.train.batch([image, label],batch_size = batch_size, num_threads = 64, capacity = capacity)
# 重新定义下 label_batch 的形状
label_batch = tf.reshape(label_batch , [batch_size])
# 转化图片
image_batch = tf.cast(image_batch,tf.float32)
return image_batch, label_batch
def one_hot(labels):
'''one-hot 编码'''
n_sample=len(labels)
n_class=max(labels)+1
onehot_labels=np.zeros((n_sample,n_class))
onehot_labels[np.arange(n_sample),labels]=1
return onehot_labels
'''
这个文件的作用是获取训练用的一个batch。
AlexNet.py
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
#import creat_and_read_TFReacod as reader
import os
import input_data
os.environ['CUDA_VISIBLE_DEVICES'] = '0' #这步代表的是采用第一块GPU
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.InteractiveSession(config=config)
train_dir = 'D:\\神经网络\\Alexnet\\猫狗数据集\\train\\'#训练集的地址
#这两步是获取batch
x_train,y_train=input_data.get_files(train_dir)
image_batch,label_batch=input_data.get_batch(x_train,y_train,227,227,50,2048)
#Batch_Normalization正则化,这一步规范化处理相当重要,
#经过我的实验发现,如果不经过规范化处理,都很难达到收敛,这一开始困扰了我很长时间
def batch_norm(inputs,is_train,is_conv_out=True,decay=0.999):
scale=tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_train:
if is_conv_out:
batch_mean, batch_var = tf.nn.moments(inputs, [0, 1, 2])
else:
batch_mean, batch_var = tf.nn.moments(inputs, [0])
train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,
batch_mean, batch_var, beta, scale, 0.001)
else:
return tf.nn.batch_normalization(inputs,
pop_mean, pop_var, beta, scale, 0.001)
# 模型参数
learning_rate = 1e-4 #学习率
training_iters = 3000 #训练次数
batch_size = 50 #batch的大小
display_step = 5 #每隔5步打印结果
n_classes = 2 #最终划分为两类
n_fc1 = 4096 #第一层全连接层输出的参数
n_fc2 = 2048 #第二层全连接层输出的参数
# 构建模型
x = tf.placeholder(tf.float32, [None, 227, 227, 3]) #占位符
y = tf.placeholder(tf.float32, [None, n_classes])
#按照论文中的参数设置Alexnet每层网络的参数
#每层网络权重
W_conv = {
'conv1': tf.Variable(tf.truncated_normal([11, 11, 3, 96], stddev=0.0001)),
'conv2': tf.Variable(tf.truncated_normal([5, 5, 96, 256], stddev=0.01)),
'conv3': tf.Variable(tf.truncated_normal([3, 3, 256, 384], stddev=0.01)),
'conv4': tf.Variable(tf.truncated_normal([3, 3, 384, 384], stddev=0.01)),
'conv5': tf.Variable(tf.truncated_normal([3, 3, 384, 256], stddev=0.01)),
'fc1': tf.Variable(tf.truncated_normal([6 * 6 * 256, n_fc1], stddev=0.1)),
'fc2': tf.Variable(tf.truncated_normal([n_fc1, n_fc2], stddev=0.1)),
'fc3': tf.Variable(tf.truncated_normal([n_fc2, n_classes], stddev=0.1))
}
#每层网络的偏置
b_conv = {
'conv1': tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[96])),
'conv2': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[256])),
'conv3': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[384])),
'conv4': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[384])),
'conv5': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[256])),
'fc1': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[n_fc1])),
'fc2': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[n_fc2])),
'fc3': tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[n_classes]))
}
x_image = tf.reshape(x, [-1, 227, 227, 3]) #将xreshape为张量
# 卷积层 1
conv1 = tf.nn.conv2d(x_image, W_conv['conv1'], strides=[1, 4, 4, 1], padding='VALID')
conv1 = tf.nn.bias_add(conv1, b_conv['conv1'])
conv1 = batch_norm(conv1, True)
conv1 = tf.nn.relu(conv1)
# 池化层 1
pool1 = tf.nn.avg_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
norm1 = tf.nn.lrn(pool1, 5, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
# 卷积层 2
conv2 = tf.nn.conv2d(pool1, W_conv['conv2'], strides=[1, 1, 1, 1], padding='SAME')
conv2 = tf.nn.bias_add(conv2, b_conv['conv2'])
conv2 = batch_norm(conv2, True)
conv2 = tf.nn.relu(conv2)
# 池化层 2
pool2 = tf.nn.avg_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
# 卷积层3
conv3 = tf.nn.conv2d(pool2, W_conv['conv3'], strides=[1, 1, 1, 1], padding='SAME')
conv3 = tf.nn.bias_add(conv3, b_conv['conv3'])
conv3 = batch_norm(conv3, True)
conv3 = tf.nn.relu(conv3)
# 卷积层4
conv4 = tf.nn.conv2d(conv3, W_conv['conv4'], strides=[1, 1, 1, 1], padding='SAME')
conv4 = tf.nn.bias_add(conv4, b_conv['conv4'])
conv4 = batch_norm(conv4, True)
conv4 = tf.nn.relu(conv4)
# 卷积层5
conv5 = tf.nn.conv2d(conv4, W_conv['conv5'], strides=[1, 1, 1, 1], padding='SAME')
conv5 = tf.nn.bias_add(conv5, b_conv['conv5'])
conv5 = batch_norm(conv5, True)
conv5 = tf.nn.relu(conv5)
# 池化层5
pool5 = tf.nn.avg_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
reshape = tf.reshape(pool5, [-1, 6 * 6 * 256])
fc1 = tf.add(tf.matmul(reshape, W_conv['fc1']), b_conv['fc1'])
fc1 = batch_norm(fc1, True, False)
fc1 = tf.nn.relu(fc1)
# 全连接层 2
fc2 = tf.add(tf.matmul(fc1, W_conv['fc2']), b_conv['fc2'])
fc2 = batch_norm(fc2, True, False)
fc2 = tf.nn.relu(fc2)
out = tf.add(tf.matmul(fc2, W_conv['fc3']), b_conv['fc3'])
# 定义损失
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=out))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
# 评估模型
correct_pred = tf.equal(tf.argmax(out,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()
#独热编码,可以提高准确度
def onehot(labels):
'''one-hot 编码'''
n_sample = len(labels)
n_class = max(labels) + 1
onehot_labels = np.zeros((n_sample, n_class))
onehot_labels[np.arange(n_sample), labels] = 1
return onehot_labels
save_model = "./model/AlexNetModel.ckpt" #模型保存的地址以及名字
def train(opech):
with tf.Session() as sess:
sess.run(init)
train_writer = tf.summary.FileWriter(".//log", sess.graph) # 输出日志的地方
saver = tf.train.Saver()
c = []
start_time = time.time()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step = 0
for i in range(opech):
step = i
image, label = sess.run([image_batch, label_batch])
labels = onehot(label)
acc=[]
sess.run(optimizer, feed_dict={x: image, y: labels})
loss_record = sess.run(loss, feed_dict={x: image, y: labels})
acc=sess.run(accuracy,feed_dict={x:image,y:labels})
print("now the loss is %f " % loss_record)
print("now the accuracy is %f "%acc)
c.append(loss_record)
end_time = time.time()
print('time: ', (end_time - start_time))
start_time = end_time
print("---------------%d onpech is finished-------------------" % i)
print("Optimization Finished!")
# checkpoint_path = os.path.join(".//model", 'model.ckpt') # 输出模型的地方
saver.save(sess, save_model)
print("Model Save Finished!")
coord.request_stop()
coord.join(threads)
plt.plot(c)
plt.xlabel('Iter')
plt.ylabel('loss')
plt.title('lr=%f, ti=%d, bs=%d' % (learning_rate, training_iters, batch_size))
plt.tight_layout()
plt.savefig('cat_and_dog_AlexNet.jpg', dpi=200)
train(training_iters)
训练时只需要更改训练集的地址即可
4、训练过程
训练结束后模型会保存在model文件夹中,我们就可以用训练好的模型进行测试啦。
5、模型测试
import cv2
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import input_data
from PIL import Image
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.InteractiveSession(config=config)
testfile = 'D:\\神经网络\\Alexnet\\猫狗数据集\\test\\'#测试集地址
def batch_norm(inputs,is_train,is_conv_out=True,decay=0.999):
scale=tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_train:
if is_conv_out:
batch_mean, batch_var = tf.nn.moments(inputs, [0, 1, 2])
else:
batch_mean, batch_var = tf.nn.moments(inputs, [0])
train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,
batch_mean, batch_var, beta, scale, 0.001)
else:
return tf.nn.batch_normalization(inputs,
pop_mean, pop_var, beta, scale, 0.001)
# 模型参数
learning_rate = 1e-4
training_iters = 200
batch_size = 50
display_step = 5
n_classes = 2
n_fc1 = 4096
n_fc2 = 2048
# 构建模型
x = tf.placeholder(tf.float32, [None, 227, 227, 3])
y = tf.placeholder(tf.float32, [None, n_classes])
W_conv = {
'conv1': tf.Variable(tf.truncated_normal([11, 11, 3, 96], stddev=0.0001)),
'conv2': tf.Variable(tf.truncated_normal([5, 5, 96, 256], stddev=0.01)),
'conv3': tf.Variable(tf.truncated_normal([3, 3, 256, 384], stddev=0.01)),
'conv4': tf.Variable(tf.truncated_normal([3, 3, 384, 384], stddev=0.01)),
'conv5': tf.Variable(tf.truncated_normal([3, 3, 384, 256], stddev=0.01)),
'fc1': tf.Variable(tf.truncated_normal([6 * 6 * 256, n_fc1], stddev=0.1)),
'fc2': tf.Variable(tf.truncated_normal([n_fc1, n_fc2], stddev=0.1)),
'fc3': tf.Variable(tf.truncated_normal([n_fc2, n_classes], stddev=0.1))
}
b_conv = {
'conv1': tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[96])),
'conv2': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[256])),
'conv3': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[384])),
'conv4': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[384])),
'conv5': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[256])),
'fc1': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[n_fc1])),
'fc2': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[n_fc2])),
'fc3': tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[n_classes]))
}
x_image = tf.reshape(x, [-1, 227, 227, 3])
# 卷积层 1
conv1 = tf.nn.conv2d(x_image, W_conv['conv1'], strides=[1, 4, 4, 1], padding='VALID')
conv1 = tf.nn.bias_add(conv1, b_conv['conv1'])
conv1 = batch_norm(conv1, True)
conv1 = tf.nn.relu(conv1)
# 池化层 1
pool1 = tf.nn.avg_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
norm1 = tf.nn.lrn(pool1, 5, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
# 卷积层 2
conv2 = tf.nn.conv2d(pool1, W_conv['conv2'], strides=[1, 1, 1, 1], padding='SAME')
conv2 = tf.nn.bias_add(conv2, b_conv['conv2'])
conv2 = batch_norm(conv2, True)
conv2 = tf.nn.relu(conv2)
# 池化层 2
pool2 = tf.nn.avg_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
# 卷积层3
conv3 = tf.nn.conv2d(pool2, W_conv['conv3'], strides=[1, 1, 1, 1], padding='SAME')
conv3 = tf.nn.bias_add(conv3, b_conv['conv3'])
conv3 = batch_norm(conv3, True)
conv3 = tf.nn.relu(conv3)
# 卷积层4
conv4 = tf.nn.conv2d(conv3, W_conv['conv4'], strides=[1, 1, 1, 1], padding='SAME')
conv4 = tf.nn.bias_add(conv4, b_conv['conv4'])
conv4 = batch_norm(conv4, True)
conv4 = tf.nn.relu(conv4)
# 卷积层5
conv5 = tf.nn.conv2d(conv4, W_conv['conv5'], strides=[1, 1, 1, 1], padding='SAME')
conv5 = tf.nn.bias_add(conv5, b_conv['conv5'])
conv5 = batch_norm(conv5, True)
conv5 = tf.nn.relu(conv5)
# 池化层5
pool5 = tf.nn.avg_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
reshape = tf.reshape(pool5, [-1, 6 * 6 * 256])
fc1 = tf.add(tf.matmul(reshape, W_conv['fc1']), b_conv['fc1'])
fc1 = batch_norm(fc1, True, False)
fc1 = tf.nn.relu(fc1)
# 全连接层 2
fc2 = tf.add(tf.matmul(fc1, W_conv['fc2']), b_conv['fc2'])
fc2 = batch_norm(fc2, True, False)
fc2 = tf.nn.relu(fc2)
out = tf.add(tf.matmul(fc2, W_conv['fc3']), b_conv['fc3'])
saver = tf.train.Saver()
with tf.device('/gpu:0'):
def Evaluate(testfile):
count = 0
sums = 0
start_time = time.time()
with tf.Session() as sess:
# sess.run(tf.initialize_all_variables())
saver.restore(sess, './model/AlexNetModel.ckpt-2000')#提取模型参数
for root, sub_folders, files in os.walk(testfile):
for name in files:
sums += 1
imagefile = os.path.join(root, name)
print(imagefile)
image = Image.open(imagefile)
image = image.resize([224, 224])
image_array = np.array(image)
image = tf.cast(image_array, tf.float32)
image = tf.image.per_image_standardization(image)
image = tf.reshape(image, [1, 224, 224, 3])
image = sess.run(image)
prediction = sess.run(out,feed_dict={x: image})
end_time = time.time()
print('time: ', (end_time - start_time))
start_time = end_time
max_index = np.argmax(prediction)
if max_index==0:
print("猫")
else:
print("狗")
if max_index == 0 and name.split('.')[0] == 'cat':
count += 1
if max_index == 1 and name.split('.')[0] == 'dog':
count += 1
print(" The accuracy is: ", count,sums)
print(" The accuracy is: ", count / sums)
print(" The accuracy is: ", count,sums)
Evaluate(testfile)
三、总结
我们首先要做的就是划分数据集,先运行devide.py文件,从训练集中抽取20%作为测试集,运行结束之后,运行reshape.py文件,将训练集和测试集都reshape为224x224,然后运行train.py文件,注意要修改测试集的地址。训练完成以后模型保存在model文件夹中,我们运行evaluate.py文件进行模型测试,注意要修改模型所在的地址。
各位同学如果有不懂的地方可以评论或者私信我,我都会一一解答,有错误的地方还请大佬指正