参考书目:陈允杰.TensorFlow与Keras——Python深度学习应用实战.北京:中国水利水电出版社,2021
本系列基本不讲数学原理,只从代码角度去让读者们利用最简洁的Python代码实现深度学习方法。
自编码器是一种实现编码和解码的神经网络,是一种数据压缩的算法,类似于主成分分析,是一种降维的特征提取。其特点为:
1.只使用特定的数据自编码器,只适用于与训练集相似的数据压缩。
2.在编码和解码的过程中,数据会有一定的损失。
3.自编码器是从自身学习,他不需要标签数据,是一种非监督式的学习。
下面分别使用MLP和CNN作为编码解码器,处理手写数字数据集,并降噪
MLP创建自编码器
导入数据和包,使用手写数字集MNIST:
import numpy as np
import pandas as pd
from keras.datasets import mnist
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Dense
np.random.seed(10) # 指定乱数种子
# 载入数据集
(X_train, _), (X_test, _) = mnist.load_data()
#转换成 28*28 = 784 的向量
X_train = X_train.reshape(X_train.shape[0], 28*28).astype("float32")
X_test = X_test.reshape(X_test.shape[0], 28*28).astype("float32")
# 因为是固定范围, 所以执行正规化, 从 0-255 至 0-1
X_train = X_train / 255
X_test = X_test / 255
X_train.shape
因为是MLP,只接受2维输入,将X一张2为图片拉成784的向量 ,总共60000个X,下面开始定义编码器
# 定义autoencoder 模型
input_img = Input(shape=(784,))
x = Dense(128, activation="relu")(input_img)
encoded = Dense(64, activation="relu")(x)
这是编码阶段,输入图片imput_img,经过一个隐藏层变为X,然后再经过一个隐藏层变为encoded输出,下面是解码器
x = Dense(128, activation="relu")(encoded)
decoded = Dense(784, activation="sigmoid")(x)
使用Model进行合并为自编码器
autoencoder = Model(input_img, decoded)
autoencoder.summary() # 显示模型摘要信息
编码器为:
# 定义encoder 模型
encoder = Model(input_img, encoded)
encoder.summary() # 显示模型摘要信息
定义解码器
# 定义 decoder 模型
decoder_input = Input(shape=(64,))
decoder_layer = autoencoder.layers[-2](decoder_input)
decoder_layer = autoencoder.layers[-1](decoder_layer)
decoder = Model(decoder_input, decoder_layer)
decoder.summary() # 显示模型摘要信息
#编译模型
from tensorflow.keras import optimizers
autoencoder.compile(loss="binary_crossentropy", optimizer= optimizers.Adam(),metrics=["accuracy"])
开始训练
# 训练模型
autoencoder.fit(X_train, X_train, validation_data=(X_test, X_test),epochs=10, batch_size=256, shuffle=True, verbose=1)
计算压缩图片,也就是编码器的输出,然后计算解码器的输出
# 压缩图片
encoded_imgs = encoder.predict(X_test)
# 解压缩图片
decoded_imgs = decoder.predict(encoded_imgs)
然后画图展示一下
# 显示原始, 压缩和还原图片
n = 10 # 显示几个数字
plt.figure(figsize=(20, 6))
for i in range(n):
# 原始图片
ax = plt.subplot(3, n, i + 1)
ax.imshow(X_test[i].reshape(28, 28), cmap="gray")
ax.axis("off")
#压缩图片
ax = plt.subplot(3, n, i + 1 + n)
ax.imshow(encoded_imgs[i].reshape(8, 8), cmap="gray")
ax.axis("off")
# 还原图片
ax = plt.subplot(3, n, i + 1 + 2*n)
ax.imshow(decoded_imgs[i].reshape(28, 28), cmap="gray")
ax.axis("off")
plt.show()
中间那些很糊的就是中间的压缩图片,而且解码出来的图片有一定的损失。
CNN创建自编码器
前面过程一样
import numpy as np
from keras.datasets import mnist
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
# 指定乱数种子
seed = 7
np.random.seed(seed)
# 载入数据集
(X_train, _), (X_test, _) = mnist.load_data()
#转换成4D 张量
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype("float32")
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype("float32")
# 因为是固定范围, 所以执行正规化, 从 0-255 至 0-1
X_train = X_train / 255
X_test = X_test / 255
定义的层不一样,CNN除了卷积层还有池化层,层数更为复杂
# 定义 autoencoder 模型
input_img = Input(shape=(28,28,1))
x = Conv2D(16, (3,3), activation="relu", padding="same")(input_img)
x = MaxPooling2D((2,2), padding="same")(x)
x = Conv2D(8, (3,3), activation="relu", padding="same")(x)
x = MaxPooling2D((2,2), padding="same")(x)
x = Conv2D(8, (3,3), activation="relu", padding="same")(x)
encoded = MaxPooling2D((2,2), padding="same")(x)
x = Conv2D(8, (3,3), activation="relu", padding="same")(encoded)
x = UpSampling2D((2,2))(x)
x = Conv2D(8, (3,3), activation="relu", padding="same")(x)
x = UpSampling2D((2,2))(x)
x = Conv2D(16, (3,3), activation="relu")(x)
x = UpSampling2D((2,2))(x)
decoded = Conv2D(1, (3, 3), activation="sigmoid", padding="same")(x)
autoencoder = Model(input_img, decoded)
autoencoder.summary() # 显示模型摘要资讯
编码器
#定义 encoder 模型
encoder = Model(input_img, encoded)
encoder.summary() #显示模型摘要资讯
解码器
#定义 decoder 模型
decoder_input = Input(shape=(4,4,8))
decoder_layer = autoencoder.layers[-7](decoder_input)
decoder_layer = autoencoder.layers[-6](decoder_layer)
decoder_layer = autoencoder.layers[-5](decoder_layer)
decoder_layer = autoencoder.layers[-4](decoder_layer)
decoder_layer = autoencoder.layers[-3](decoder_layer)
decoder_layer = autoencoder.layers[-2](decoder_layer)
decoder_layer = autoencoder.layers[-1](decoder_layer)
decoder = Model(decoder_input, decoder_layer)
decoder.summary() # 显示模型摘要资讯
编译模型和训练
# 编译模型
autoencoder.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
# 训练模型
autoencoder.fit(X_train, X_train, validation_data=(X_test, X_test), epochs=10, batch_size=128, shuffle=True, verbose=2)
计算压缩图片和解码图片,画图展示
# 压缩图片
encoded_imgs = encoder.predict(X_test)
#解压图片
decoded_imgs = decoder.predict(encoded_imgs)
# 显示原始, 压缩和还原图片
import matplotlib.pyplot as plt
n = 10 # 显示几个数字
plt.figure(figsize=(20, 8))
for i in range(n):
# 原始图片
ax = plt.subplot(3, n, i + 1)
ax.imshow(X_test[i].reshape(28, 28), cmap="gray")
ax.axis("off")
# 压缩图片
ax = plt.subplot(3, n, i + 1 + n)
ax.imshow(encoded_imgs[i].reshape(4, 4*8).T, cmap="gray")
ax.axis("off")
# 还原图片
ax = plt.subplot(3, n, i + 1 + 2*n)
ax.imshow(decoded_imgs[i].reshape(28, 28), cmap="gray")
ax.axis("off")
plt.show()
CNN自编码器去噪
前面过程一样,模型也是一样的,直接复制就行,就是中间加了一段给图片加了噪音
import numpy as np
from keras.datasets import mnist
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
#指定乱数种子
seed = 7
np.random.seed(seed)
# 载入数据集
(X_train, _), (X_test, _) = mnist.load_data()
# 转换成4D 张量
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype("float32")
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype("float32")
# 因为是固定范围, 所以执行正规化, 从 0-255 至 0-1
X_train = X_train / 255
X_test = X_test / 255
# 替图片制造杂讯
nf = 0.5
size_train = X_train.shape
X_train_noisy = X_train+nf*np.random.normal(loc=0.0, scale=1.0,size=size_train)
X_train_noisy = np.clip(X_train_noisy, 0., 1.)
size_test = X_test.shape
X_test_noisy = X_test+nf*np.random.normal(loc=0.0,scale=1.0,size=size_test)
X_test_noisy = np.clip(X_test_noisy, 0., 1.)
# 定义 autoencoder 模型
input_img = Input(shape=(28,28,1))
x = Conv2D(16, (3,3), activation="relu", padding="same")(input_img)
x = MaxPooling2D((2,2), padding="same")(x)
x = Conv2D(8, (3,3), activation="relu", padding="same")(x)
x = MaxPooling2D((2,2), padding="same")(x)
x = Conv2D(8, (3,3), activation="relu", padding="same")(x)
encoded = MaxPooling2D((2,2), padding="same")(x)
x = Conv2D(8, (3,3), activation="relu", padding="same")(encoded)
x = UpSampling2D((2,2))(x)
x = Conv2D(8, (3,3), activation="relu", padding="same")(x)
x = UpSampling2D((2,2))(x)
x = Conv2D(16, (3,3), activation="relu")(x)
x = UpSampling2D((2,2))(x)
decoded = Conv2D(1, (3, 3), activation="sigmoid", padding="same")(x)
autoencoder = Model(input_img, decoded)
autoencoder.summary() #显示模型摘要资讯
# 定义encoder 模型
encoder = Model(input_img, encoded)
encoder.summary() # 显示模型摘要资讯
# 定义 decoder 模型
decoder_input = Input(shape=(4,4,8))
decoder_layer = autoencoder.layers[-7](decoder_input)
decoder_layer = autoencoder.layers[-6](decoder_layer)
decoder_layer = autoencoder.layers[-5](decoder_layer)
decoder_layer = autoencoder.layers[-4](decoder_layer)
decoder_layer = autoencoder.layers[-3](decoder_layer)
decoder_layer = autoencoder.layers[-2](decoder_layer)
decoder_layer = autoencoder.layers[-1](decoder_layer)
decoder = Model(decoder_input, decoder_layer)
decoder.summary() # 显示模型摘要资讯
# 编译模型
autoencoder.compile(loss="binary_crossentropy", optimizer="adam",metrics=["accuracy"])
# 训练模型
autoencoder.fit(X_train_noisy, X_train,
validation_data=(X_test_noisy, X_test),
epochs=10, batch_size=128, shuffle=True, verbose=2)
# 压缩图片
encoded_imgs = encoder.predict(X_test_noisy)
#解压缩图片
decoded_imgs = decoder.predict(encoded_imgs)
# 显示原始, 压缩和还原图片
import matplotlib.pyplot as plt
n = 10 # 显示几个数字
plt.figure(figsize=(20, 8))
for i in range(n):
# 原始图片
ax = plt.subplot(3, n, i + 1)
ax.imshow(X_test_noisy[i].reshape(28, 28), cmap="gray")
ax.axis("off")
#压缩图片
ax = plt.subplot(3, n, i + 1 + n)
ax.imshow(encoded_imgs[i].reshape(4, 4*8).T, cmap="gray")
ax.axis("off")
# 还原图片
ax = plt.subplot(3, n, i + 1 + 2*n)
ax.imshow(decoded_imgs[i].reshape(28, 28), cmap="gray")
ax.axis("off")
plt.show()
去噪效果还不错