CNN图像分类-keras
基于keras实现经典的CIFAR10图像数据集的分类,实现对本地图片进行识别训练模型
数据结构:
我的图像数据是放在data文件夹下的img文件,img文件夹下的子文件夹,如图:
子文件夹的文件名为图像标签,对应该标签的图像放在同一个文件夹中,我的图像类别共有5个
代码实现:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import keras
import os
# 图像文件夹路径
folder_path = './data/img/'
# 获取所有图像文件的路径
def get_images_and_labels(folder_path):
images = []
labels = []
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.lower().endswith(('.jpg', '.png', '.jpeg', '.bmp')):
img_path = os.path.join(root, file)
images.append(img_path)
# 子文件夹名称作为标签
label = os.path.basename(root)
labels.append(label)
return images, labels
img_files, labels = get_images_and_labels(folder_path)
# 数据切分为训练和测试集
x_train, x_test, y_train, y_test = train_test_split(img_files, labels, test_size=0.2, random_state=42)
print(len(x_train)) # 样本数
# 将标签向量转换为二值矩阵。
num_classes = 5 # 图像数据有5个实际标签类别
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape, 'ytrain')
# 创建图像数据的NumPy数组
x_train = np.array([img_to_array(load_img(img, target_size=(32, 32))) for img in x_train], dtype='float32')
x_test = np.array([img_to_array(load_img(img, target_size=(32, 32))) for img in x_test], dtype='float32')
# 图像数据归一化
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape)
# 构造卷积神经网络
model = Sequential()
# 图像输入形状(32, 32, 3) 对应(image_height, image_width, color_channels)
model.add(Conv2D(32, (3, 3), padding='same',
input_shape=(32, 32, 3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# 卷积、池化层输出都是一个三维的(height, width, channels)
# 越深的层中,宽度和高度都会收缩
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# 3 维展平为 1 维 ,输入全连接层
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
# 初始化 RMSprop 优化器
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
# 模型编译:设定RMSprop 优化算法;设定分类损失函数;
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
batch_size = 64
epochs = 5
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(x_test, y_test),
shuffle=True)
model.save('cnn_model.h5')
# 评估训练模型
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0]) # 损失
print('Test accuracy:', scores[1]) # 准确
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.show()
如果评分不够高,可以通过增加epoch数或增加数据来提高模型准确率。