一. 自编码器
如果把算法x作为监督信号来学习,这类算法成为自监督学习(Self-supervised learning)。自编码器算法属于自监督学习的范畴。
自编码器算法原理:利用数据x本身作为监督信号来知道网络的训练,即希望神经网络能够学习到映射𝑓𝜃: 𝒙 → 𝒙。我们把网络𝑓𝜃分为两部分,前面的子网络学习映射关系:𝑔𝜃1 : 𝒙 → 𝒛,后面的子网络尝试学习映射关系ℎ𝜃2 : 𝒛 → 𝒙。于是把𝑔𝜃1看成一个数据编码(Encode)的过程,把高维度的输入𝒙编码成低维度的隐变量𝒛(Latent variable,或隐藏变量),称为Encoder 网络(编码器);ℎ𝜃2看成数据解码(Decode)的过程,把编码过后的输入𝒛解码为高维度的𝒙,称为Decoder 网络(解码器)。
二. Fashion MNIST图片重建
import tensorflow as tf
from tensorflow import keras
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train = tf.convert_to_tensor(x_train/255., tf.float32)
x_test = tf.convert_to_tensor(x_test/255., tf.float32)
batchsz = 100
train_db = tf.data.Dataset.from_tensor_slices(x_train)
test_db = tf.data.Dataset.from_tensor_slices(x_test)
train_db = train_db.shuffle(batchsz*5).batch(batchsz).repeat(10)
test_db = test_db.batch(batchsz)
# model = keras.Sequential([
# keras.layers.Dense(256, activation=tf.nn.relu), # 参数量784*256+256
# keras.layers.Dense(128, activation=tf.nn.relu), # 参数量256*128+128
# keras.layers.Dense(20),
# keras.layers.Dense(128, activation=tf.nn.relu), # 参数量20*128+128
# keras.layers.Dense(256, activation=tf.nn.relu), # 参数量128*256+256
# keras.layers.Dense(784)
# ])
class AE(keras.Model):
def __init__(self):
super(AE, self).__init__()
# 创建Encoders网络
self.encoder = keras.Sequential([
keras.layers.Dense(256, activation=tf.nn.relu), # 参数量784*256+256
keras.layers.Dense(128, activation=tf.nn.relu), # 参数量256*128+128
keras.layers.Dense(20) # 参数量128*20+20
])
# 创建Decoders网络
self.decoder = keras.Sequential([
keras.layers.Dense(128, activation=tf.nn.relu), # 参数量20*128+128
keras.layers.Dense(256, activation=tf.nn.relu), # 参数量128*256+256
keras.layers.Dense(784) # 参数量256*784+784
])
def call(self, inputs, training=None):
# 前向传播
# 编码获取隐藏向量h
h = self.encoder(inputs)
# 解码获取重建图片
out = self.decoder(h)
return out
model = AE()
model.build(input_shape=(4,784))
model.summary()
optimizer = keras.optimizers.Adam(learning_rate=1e-3)
for step, x in enumerate(train_db):
x = tf.reshape(x,[-1,784])
with tf.GradientTape() as tape:
xx = model(x)
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=x, logits=xx)
loss = tf.reduce_mean(loss)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step%100 == 0:
print(step, 'loss: ', float(loss))
def save_images(imgs, name):
new_im = Image.new('L', (280, 280))
index = 0
for i in range(0, 280, 28): # 10 行图片阵列
for j in range(0, 280, 28): # 10 列图片阵列
im = imgs[index]
im = Image.fromarray(im, mode='L')
new_im.paste(im, (i, j)) # 写入对应位置
index += 1
# 保存图片阵列
new_im.save(name)
x = next(iter(test_db))
logits = model(tf.reshape(x, [-1,784]))
x_hat = tf.sigmoid(logits)
x_hat = tf.reshape(x_hat, [-1,28,28])
x_concat = tf.concat([x[:50], x_hat[:50]], axis=0)
x_concat = x_concat.numpy() * 255.
x_concat = x_concat.astype(np.uint8)
save_images(x_concat,'10.png')