1.数据加载及预处理
# 数据预处理 将数据进行类型转换
def process(x, y):
x = tf.cast(x, dtype=tf.float32) / 255.
y = tf.cast(y, dtype=tf.int64)
return x, y
# 加载数据 并进行 shuffle 和 batch 等操作
(x, y), (x_test, y_test) = datasets.cifar100.load_data()
y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
db_train = tf.data.Dataset.from_tensor_slices((x, y))
db_train = db_train.shuffle(1000).map(process).batch(64)
db_test = tf.data.Dataset.from_tensor_slices((x, y))
db_test = db_test.map(process).batch(64)
数据集有 160M , 用 pycharm 直接加载很慢很慢,建议去官网复制下载链接到迅雷,然后粘贴到 ‘’ C:\Users\SaltedFish.keras\datasets ‘’ 目录, 就直接可以用
官网下载链接: http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
2.建立卷积网络并训练、预测并的出准确率
conv_layers = [
layers.Conv2D(64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu), # 卷积层 取的是局部特征, 通过局部特征来表示
layers.Conv2D(64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),# 池化层 保留主要的特征同时减少参数(降维,效果类似PCA)和计算量,防止过拟合,提高模型泛化能力
layers.Conv2D(128, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.Conv2D(128, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
layers.Conv2D(256, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.Conv2D(256, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
]
conv_net = Sequential(conv_layers) # 建立神经网络
conv_net.build(input_shape=[None, 32, 32, 3]) # 设置传入的 shape
# 建立全连接层
fc_net = Sequential([
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(100, activation=None)
])
# 设置全连接层的输入 shape
fc_net.build(input_shape=[None, 512])
# 设置优化器
optimizer = optimizers.Adam(lr=1e-4)
# 将两个网络的参数合成一个参数列表 进行更新
variables = conv_net.trainable_variables + fc_net.trainable_variables
acc_meter = metrics.Accuracy()
loss_meter = metrics.Mean()
for epoch in range(50):
for step, (x, y) in enumerate(db_train):
with tf.GradientTape() as tape:
# [b,32,32,3] ==> [b,1,1,512]
out = conv_net(x)
out = tf.reshape(out, [-1, 512])
# [b,512] ==> [b,100]
logits = fc_net(out)
y_one_hot = tf.one_hot(y, depth=100)
loss = tf.losses.categorical_crossentropy(y_one_hot, logits, from_logits=True)
# 更新 loss 函数
loss_meter.update_state(loss)
grads = tape.gradient(loss, conv_net.trainable_variables + fc_net.trainable_variables)
optimizer.apply_gradients(zip(grads, variables))
if step % 100 == 0:
print(epoch, step, 'loss:', loss_meter.result().numpy())
loss_meter.reset_states()
for step, (x, y) in enumerate(db_test):
with tf.GradientTape() as tape:
out = conv_net(x)
out = tf.reshape(out, [-1, 512])
logits = fc_net(out)
prob = tf.nn.softmax(logits, axis=1)
pred = tf.argmax(prob, axis=1)
acc_meter.update_state(y, pred)
print(epoch, "acc result:", acc_meter.result().numpy())