1.制作数据集
(1)下载数据集。从网上下载kaggle猫狗分类的数据集,为缩短训练时间,选择2000张图片(猫狗各1000张)作为训练集,200张图片(猫狗各100张)作为测试集。在train文件夹选0-1999的猫和0-1999的狗作为训练集,选2000-2099的猫和2000-2099的狗作为测试集。
(2)调整图片的大小。图片大小不一,需要调整图片的大小,重新设定规格(244,244,3),从而在后续的网络模型输入时,保证输入到模型中的图片大小一致。
##### resize_data.py #####
import os
import cv2
dir_train = "/home/xiaobin/PycharmProjects/figure/train_1000"
dir_test = "/home/xiaobin/PycharmProjects/figure/test_200"
for root, dirs, files in os.walk(dir_test):
for file in files:
filepath = os.path.join(root, file)
image = cv2.imread(filepath)
dim = (224, 224)
resized = cv2.resize(image, dim)
path = "/home/xiaobin/PycharmProjects/figure/test/" + file
cv2.imwrite(path, resized)
# os.walk() 方法是一个简单易用的文件、目录遍历器
# root 所指的是当前正在遍历的这个文件夹的本身的地址
# dirs 是一个list ,内容是该文件夹中所有的目录的名字(不包括子目录)
# files 同样是list , 内容是该文件夹中所有的文件(不包括子目录)
(3)制作标签文档。为让图片和标签匹配,制作训练集和测试集图片的索引文本。编写代码实现:
##### make_txt.py ########
f1 = open("train.txt", 'w')
for i in range(1000):
f1.write("cat.%d.jpg %d\n" % (i, 0))
for j in range(1000):
f1.write("dog.%d.jpg %d\n" % (j, 1))
f1.close()
f2 = open("test.txt", "w")
for i in range(100):
f2.write("cat.%d.jpg %d\n" % (i+2000, 0))
for j in range(100):
f2.write("dog.%d.jpg %d\n" % (j+2000, 1))
f2.close()
2.搭建网络训练
# 1.导入一些模块
import cv2
import tensorflow as tf
import numpy as np # 用于数据格式转换
import os # 路径
from tensorflow.keras.preprocessing.image import ImageDataGenerator # 数据增强
from matplotlib import pyplot as plt
from tensorflow.keras.layers import Dense,Activation,Dropout,Conv2D,BatchNormalization,MaxPool2D,Flatten
# 设置GPU显存按需申请
gpu = tf.config.experimental.list_physical_devices(device_type='GPU')
assert len(gpu) == 1
tf.config.experimental.set_memory_growth(gpu[0], True)
# 2.路径和存储文件
train_path = './train_2000/' # 训练集图片路径
train_txt = 'train.txt' # 训练集标签文件
test_path = './test_200/' # 测试集图片路径
test_txt = 'test.txt' # 测试集标签文件
# 3.制作数据集的函数
def generateds(path, txt): # 图片路径,标签文件
f = open(txt, 'r') # 以只读的形式打开txt
contents = f.readlines() # 读取文件中所有的行,每行为一个单位
f.close()
x, y_ = [], []
for content in contents: # 逐行读出
value = content.split() # 以空格分开
img_path = path + value[0]
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 转为RGB
img = img / 255.0 # 归一化,有利于网络吸收
x.append(img)
y_.append(value[1])
# print('load:' + content)
x = np.array(x)
y_ = np.array(y_)
y_ = y_.astype(np.int64)
return x, y_
# 4.加载数据
print('----------------Generate Datasets--------------')
x_train, y_train = generateds(train_path, train_txt)
x_test, y_test = generateds(test_path, test_txt)
# 5.打乱数据集样本顺序
index = [i for i in range(len(x_train))]
np.random.shuffle(index)
x_train = x_train[index]
y_train = y_train[index]
index1 = [j for j in range(len(x_test))]
np.random.shuffle(index1)
x_test = x_test[index1]
y_test = y_test[index1]
# 6.数据增强
image_gen_train = ImageDataGenerator(
rescale=1. / 1., # 如果是图像,分母为255,可以归一化到0-1
rotation_range=45, # 随机45度旋转
width_shift_range=.15, # 宽度偏移
height_shift_range=.15, # 高度偏移
horizontal_flip=True, # 水平翻转
zoom_range=0.5 # 将图像随机缩放阈量50%
)
image_gen_train.fit(x_train)
# 7.搭建网络
model = tf.keras.models.Sequential([
Conv2D(filters=32, kernel_size=(3, 3)),
BatchNormalization(),
Activation('relu'),
MaxPool2D(pool_size=(2, 2), strides=2),
Conv2D(filters=64, kernel_size=(3, 3)),
BatchNormalization(),
Activation('relu'),
MaxPool2D(pool_size=(2, 2), strides=2),
Conv2D(filters=128, kernel_size=(3, 3)),
BatchNormalization(),
Activation('relu'),
MaxPool2D(pool_size=(2, 2), strides=2),
Flatten(), # 把输入特征拉直为一维数组数值
Dense(128, activation='relu'),
Dense(64, activation='relu'),
Dense(2, activation='softmax')
])
# 8.配置参数
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['sparse_categorical_accuracy'])
# 9.设置调用和保存模型
# 调用模型
checkpoint_save_path = "./checkpoint/cat_dag.ckpt"
if os.path.exists(checkpoint_save_path + '.index'):
print("--------------load model--------------")
model.load_weights(checkpoint_save_path)
# 保存模型
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path,
save_weights_only=True,
save_best_only=True)
# 10.训练
history = model.fit(image_gen_train.flow(x_train, y_train, batch_size=32),
epochs=15, validation_data=(x_test, y_test), validation_freq=1,
callbacks=[cp_callback])
## 提取acc和loss
acc = history.history['sparse_categorical_accuracy']
val_acc = history.history['val_sparse_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
# 11.打印和保存网络参数
model.summary()
file = open('./weights.txt', 'w')
for v in model.trainable_variables:
file.write(str(v.name) + '\n')
file.write(str(v.shape) + '\n')
file.write(str(v.numpy()) + '\n')
file.close()
# 12.绘制acc和loss曲线
plt.subplot(1, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()
训练的正确率在72%左右,loss和acc图如下所示:
3.输入图片测试
import tensorflow as tf
from tensorflow.keras.layers import Dense,Activation,Dropout,Conv2D,BatchNormalization,MaxPool2D,Flatten
import cv2
# 设置GPU显存按需申请
gpu = tf.config.experimental.list_physical_devices(device_type='GPU')
assert len(gpu) == 1
tf.config.experimental.set_memory_growth(gpu[0], True)
# 1.复现模型(前向传播)
model = tf.keras.models.Sequential([
Conv2D(filters=32, kernel_size=(3, 3)),
BatchNormalization(),
Activation('relu'),
MaxPool2D(pool_size=(2, 2), strides=2),
Conv2D(filters=64, kernel_size=(3, 3)),
BatchNormalization(),
Activation('relu'),
MaxPool2D(pool_size=(2, 2), strides=2),
Conv2D(filters=128, kernel_size=(3, 3)),
BatchNormalization(),
Activation('relu'),
MaxPool2D(pool_size=(2, 2), strides=2),
Flatten(), # 把输入特征拉直为一维数组数值
Dense(128, activation='relu'),
Dense(64, activation='relu'),
Dense(2, activation='softmax')
])
# 2.加载参数
model_save_path = "./checkpoint/cat_dag.ckpt" # 模型参数存储的路径
model.load_weights(model_save_path)
# 3.数据预处理
img = cv2.imread("./train/dog.228.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 转为RGB
img = cv2.resize(img, (224, 224))
img = img / 255.0 # 归一化
img = img[tf.newaxis, ...]
# 4.预测结果
result = model.predict(img)
pred = tf.argmax(result, axis=1)
if pred.numpy() == 0:
print('识别结果是:小猫')
else:
print('识别结果是:小狗')