这些方式都轻轻松松获取85%以上的测试准确率
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ["KERAS_BACKEND"] = "tensorflow"
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
import keras
from keras import layers
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
learning_rate = 2e-3
weight_decay = 1e-4
batch_size = 128
num_epochs = 10
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print(x_train.shape,y_train.shape,x_train.max(),x_test.shape)
val_split = 0.1
print(x_train.max(),x_train.min(),np.unique(y_train))
plt.figure(figsize=(15,15),dpi=100)
for i in range(64):
plt.subplot(8,8,i+1)
plt.xticks([])
plt.yticks([])
plt.xlabel(y_train[i])
plt.imshow(x_train[i])
val_indices = int(len(x_train) * val_split)
new_x_train, new_y_train = x_train[val_indices:], y_train[val_indices:]
x_val, y_val = x_train[:val_indices], y_train[:val_indices]
image_size = 32
auto = tf.data.AUTOTUNE
augmentation_layers = [
# keras.layers.Resizing(34,34),
keras.layers.RandomCrop(image_size, image_size),#随机裁剪
keras.layers.RandomFlip("horizontal"),#随机水平翻转
# keras.layers.RandomRotation(0.06),#随机旋转
# keras.layers.RandomZoom(0.06)#随机缩放
]
def augment_images(images):
for layer in augmentation_layers:#连续处理images
images = layer(images, training=True)
return images
#构造数据集,对于train,刷新,构建批次,应用数据增强,对于其他,只构建批次
def make_datasets(images, labels, is_train=False):
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_train:
dataset = dataset.shuffle(batch_size * 20)
dataset = dataset.batch(batch_size)
if is_train:
dataset = dataset.map(
lambda x, y: (augment_images(x), y), num_parallel_calls=auto
)
return dataset.prefetch(auto)
train_dataset = make_datasets(new_x_train, new_y_train, is_train=True)
val_dataset = make_datasets(x_val, y_val)
test_dataset = make_datasets(x_test, y_test)
for images,labels in train_dataset.take(1):
print(images.shape,images.numpy().max(),images.numpy().min())
images=images.numpy()
images=np.clip(images,0,255).astype('uint8')
plt.figure(figsize=(12,12))
for i in range(9):
plt.subplot(3,3,i+1)
plt.imshow(images[i])
plt.axis('off')
plt.show()
# kernel_size=patch_size:定义了卷积核的大小,这个大小决定了每次卷积操作覆盖的区域。
# strides=patch_size:定义了卷积核在输入数据上滑动的步长。由于步长设置为 patch_size,
# # 每次卷积操作后,输出特征图在宽度和高度上都会减少 patch_size 个单位。
# 因此,当这个卷积层应用到输入图像 x 上时,它会将图像分割成大小为 patch_size x patch_size 的块,
# 并将这些块作为输出特征图的一部分。由于步长设置得与卷积核大小相同,输出特征图在空间维度上(宽度和高度)
# 会大大减小,每个输出特征都对应于输入图像中的一个不重叠的块。
def activation_block(x):#激活函数,批次标准化块
x = layers.Activation("gelu")(x)#gelu与relu不同,gelu允许负值存在
return layers.BatchNormalization()(x)
#卷积块
def conv_stem(x, filters: int, patch_size: int):
x = layers.Conv2D(filters, kernel_size=patch_size, strides=patch_size)(x)
return activation_block(x)
# 深度卷积(Depthwise Convolution)