介绍
数据集介绍
这里通过keras.scr.datasets.cifar10包的介绍来了解cifar10数据集:这个数据集有50000个32*32的有色训练图像和10000张测试图片,标记超过10个类别。
电脑环境介绍
PyCharm
python 3.10.9
TensorFlow2.x
CPU
模型训练
1.导入依赖
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from keras.layers import *
from keras.models import *
from keras.datasets import cifar10
import ssl
2.数据准备
使用官方给定的方法:
(x_train,y_train),(x_test,y_test) = cifar10.load_data()#加载cifar数据集并分为训练集和测试集
在使用是我遇到了如下问题,报:ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED]。这是下载数据集时遇到证书验证的问题,在这里使用的解决方法是全局取消证书验证
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
整体数据准备阶段的代码:
#2.数据准备
ssl._create_default_https_context = ssl._create_unverified_context #全局取消证书验证
(x_train,y_train),(x_test,y_test) = cifar10.load_data()#加载cifar数据集并分为训练集和测试集
# # 图片展示
# plt.figure(figsize=(10, 4)) # 创建一个画布,画布大小为宽10、高4(单位为英寸inch)
# for i, imgs in enumerate(x_train[:10]):
# # 将整个画布分成2行10列,绘制第i+1个子图。
# plt.subplot(2, 10, i+1)
# plt.imshow(imgs, cmap=plt.cm.binary)
# plt.axis('off')
# for i, imgs in enumerate(x_test[:10]):
# # 将整个画布分成2行10列,绘制第i+11个子图。
# plt.subplot(2, 10, i+11)
# plt.imshow(imgs, cmap=plt.cm.binary)
# plt.axis('off')
# plt.show() #使用pycharm的需要加入这行代码才能将图像显示出来
#数据预处理
x_train = x_train/255.0#归一化处理像素从0-255化为0-1方便处理
x_test = x_test/255.0
y_train,y_test = keras.utils.to_categorical(y_train),keras.utils.to_categorical(y_test)#标签热编码
使用上述方法下载的数据集被存放在C:\Users\xxx.keras\datasets地址下,使用是直接被调用。但是如果通过手动下载数据集,可以数据处理函数,手动处理数据集得到训练集和测试集。
1.将下载好的cifar-10-batches-py.gz解压到任意路径,这里我放到同路径的DataSet文件夹下: path=‘DataSet/cifar-10-batches-py’
2.直接上代码:(代码是通过查阅)cifar10.py源码和cifar.py源码写出的:
CIFAR数据处理
import os
import numpy as np
from keras.src import backend
import _pickle as cPickle
import matplotlib.pyplot as plt
def loadbatch(fpath, label_key="labels"):
with open(fpath, "rb") as f:
d = cPickle.load(f, encoding="bytes")
# decode utf8
d_decoded = {}
for k, v in d.items():
d_decoded[k.decode("utf8")] = v
d = d_decoded
data = d["data"]
labels = d[label_key]
data = data.reshape(data.shape[0], 3, 32, 32)
return data, labels
def loaddata():
path='DataSet/cifar-10-batches-py'
num_train_samples = 50000
x_train = np.empty((num_train_samples, 3, 32, 32), dtype="uint8") #定义一个NumPy数据存训练集图像数据,形状(50000,3,32,32)
y_train = np.empty((num_train_samples,), dtype="uint8")#定义一个NumPy数据存训练集标签数据,形状(50000,3,32,32)
#训练集数据处理
for i in range(1, 6):#遍历5个data_batch文件
fpath = os.path.join(path, "data_batch_" + str(i))#连接路径名
(
x_train[(i - 1) * 10000 : i * 10000, :, :, :],
y_train[(i - 1) * 10000 : i * 10000],
) = loadbatch(fpath)#得到指定路径的文件数据,x_train->data,y_train->labels
# 测试集数据处理
fpath = os.path.join(path, "test_batch")
x_test, y_test = loadbatch(fpath)
y_train = np.reshape(y_train, (len(y_train), 1))
y_test = np.reshape(y_test, (len(y_test), 1))
if backend.image_data_format() == "channels_last":
x_train = x_train.transpose(0, 2, 3, 1)
x_test = x_test.transpose(0, 2, 3, 1)
x_test = x_test.astype(x_train.dtype)
y_test = y_test.astype(y_train.dtype)
return (x_train, y_train), (x_test, y_test)
(x_train,y_train),(x_test,y_test) = loaddata()#加载cifar数据集并分为训练集和测试集
# 图片展示
plt.figure(figsize=(10, 4)) # 创建一个画布,画布大小为宽10、高4(单位为英寸inch)
for i, imgs in enumerate(x_train[:10]):
# 将整个画布分成2行10列,绘制第i+1个子图。
plt.subplot(2, 10, i+1)
plt.imshow(imgs, cmap=plt.cm.binary)
plt.axis('off')
for i, imgs in enumerate(x_test[:10]):
# 将整个画布分成2行10列,绘制第i+11个子图。
plt.subplot(2, 10, i+11)
plt.imshow(imgs, cmap=plt.cm.binary)
plt.axis('off')
plt.show() #使用pycharm的需要加入这行代码才能将图像显示出来
结果:
3.模型建立
#sequential按层顺序创建模型
model = Sequential()
model.add(Conv2D(32,(3,3), activation = 'relu',input_shape=(32,32,3) ))#或者用这种格式Conv2D(filter=32,kernel_size=3)
model.add(MaxPooling2D(2,2))
model.add(Conv2D(filters=64,kernel_size=3,activation='relu'))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(filters=64,kernel_size=3,activation='relu'))
model.add(MaxPooling2D(2,2))
model.add(Flatten())#全连接层准备
model.add(Dense(64, activation='relu'))
model.add(Dense(10,activation='softmax'))
4.模型编译和训练
#模型编译
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['acc'])
# #模型评估
# evaluate = model.evaluate(x_test,y_test)
# print(evaluate)
#模型训练
history = model.fit(x_train,y_train,
batch_size=60,
epochs=10,
verbose=1,
validation_data=(x_test,y_test),
shuffle=True
)
5.图像绘制
loss = history.history['loss']
val_loss = history.history['val_loss']
acc = history.history['acc']
val_acc = history.history['val_acc']
category=['airplane','automodile','bird','deer','dog','frog','horse','ship','truck']
index_x = range(10)
# plt.xticks(index_x,category)#横坐标编写字符
epochs_range = range(10)
plt.figure(figsize=(18, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()