在pycharm中选择anaconda环境
tensorflow和pytorch都要下载gpu版本
Context Encoders
pycharm中新建项目
train_images下放arch文件夹存放训练图片
修复的图片自动保存在images文件夹中
头文件
import glob
import cv2
import os
import matplotlib.pyplot as plt
#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Conv2D, LeakyReLU, BatchNormalization, UpSampling2D
from tensorflow.keras.layers import Activation, Input, Flatten, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import tensorflow as tf
会有warning,查了下好像是因为 tensorflow2.0 的原因,所以改为
import tensorflow.compat.v1 as tf tf.disable_v2_behavior()
#使用256*256图片,与原论文有出入
可以找找别的images数据集
class ContextEncoder():
def __init__(self):
self.img_rows = 256
self.img_cols = 256
self.mask_height = 64
self.mask_width = 64
self.channels = 3
self.sum_classes = 1
self.img_shape = (self.img_rows, self.img_cols, self.channels)
self.missing_shape = (self.mask_height, self.mask_width, self.channels)
optimizer = Adam(0.0002, 0.5)
# 生成器判别器
self.generator = self.build_generator()
self.discriminator = self.build_discriminator()
self.discriminator.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
# 准备联合训练
self.discriminator.trainable = False
masked_img = Input(shape=self.img_shape)
gen_missing = self.generator(masked_img)
valid = self.discriminator(gen_missing)
self.combined = Model(masked_img, [gen_missing, valid])
self.combined.compile(loss=['mse', 'binary_crossentropy'],
loss_weights=[0.999, 0.001],
optimizer=optimizer)
def build_generator(self):
model = Sequential()
# 先定义编码器
# 输入256*256*3的遮挡图
model.add(Conv2D(64, kernel_size=4, strides=2, input_shape=self.img_shape,
padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
# ——>128*12*64 特征图
model.add(Conv2D(64, kernel_size=4, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(128, kernel_size=4, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(256, kernel_size=4, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(512, kernel_size=4, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
# 8*8*512
#中间层
如果直接flatten 全连接层(据说)会参数爆炸
所以用channel wise 全连接层实现
可是tensorflow基于静态图的模型搭建方式,需要自己编写函数
# Decoder ——>上采样+卷积
model.add(UpSampling2D())
model.add(Conv2D(256, kernel_size=4, padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization(momentum=0.8))
# 第一层upconv完成 ——> 16*16*256
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=4, padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization(momentum=0.8))
# 32*32*128
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=4, padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization(momentum=0.8))
# 64*64*64
model.add(Conv2D(self.channels, kernel_size=2, padding='same'))
model.add(Activation('tanh'))
# 64*64*3 输出完成
model.summary()
masked_img = Input(shape=self.img_shape)
gen_missing = model(masked_img)
return Model(masked_img, gen_missing)
def build_discriminator(self):
model = Sequential()
# 64*64*3
model.add(Conv2D(64, kernel_size=4, strides=2, input_shape=self.missing_shape,
padding='same'))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
# 32*32*64
model.add(Conv2D(128, kernel_size=4, strides=2, padding='same'))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
# 16*16*128
model.add(Conv2D(256, kernel_size=4, strides=2, padding='same'))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
# 8*8*256
model.add(Conv2D(512, kernel_size=4, strides=2, padding='same'))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
# 4*4*512
model.add(Flatten())
# 16384
model.add(Dense(1, activation='sigmoid'))
model.summary()
img = Input(self.missing_shape)
validity = model(img)
# 完成了输入64*64*3 图片 输出真是概率
return Model(img, validity)
def train(self, epochs, batch_size=128, sample_interval=50):
if os.path.exists('saved_model/discriminator_weights.hdf5') and os.path.exists(
'saved_model/generator_weights.hdf5'):
self.discriminator.load_weights('saved_model/discriminator_weights.hdf5')
self.generator.load_weights('saved_model/generator_weights.hdf5')
print('-------------load the model-----------------')
X_train = []
list = glob.glob(r'train_images/arch/*.jpg')
for l in list:
im = cv2.imread(l)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
X_train.append(im)
X_train = np.array(X_train)
print('X_train.shape', X_train.shape, "—————————————————数据集加载完成——————————")
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
# 训练判别器
idx = np.random.randint(0, X_train.shape[0], batch_size)
imgs = X_train[idx]
imgs = imgs / 175.5 - 1. # -1 - 1
# 随机抽取batchsize个真实图像
masked_imgs, missing_parts, _ = self.mask_randomly(imgs)
# masked_imgs就代表了遮挡的batch个图像
# missing_parts就代表了丢失的batch个图像块
gen_missing = self.generator.predict(masked_imgs)
# 通过真假两个图训练判别器
d_loss_real = self.discriminator.train_on_batch(missing_parts, valid)
d_loss_fake = self.discriminator.train_on_batch(gen_missing, fake)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # 返回损失值和准确率
# 训练生成器
g_loss = self.combined.train_on_batch(masked_imgs, [missing_parts, valid])
# 在这里 使用的是 MSE ADloss
# 打印损失值以及准确率
print("%d [D loss: %f, acc: %.2f%%] [G loss mse: %f, ad loss: %f]" % (
epoch, d_loss[0], 100 * d_loss[1], g_loss[0], g_loss[1]))
# d_loss[0]判别器损失, d_loss[1]准确率, g_loss[0]联合模型的重建损失, g_loss[1]联
合模型的对抗损失
if epoch % sample_interval == 0:
# 随机生成5个整数
idx = np.random.randint(0, X_train.shape[0], 5)
imgs = X_train[idx]
imgs = imgs / 127.5 - 1.
self.sample_images(epoch, imgs)
if epoch % 1000 == 0:
self.save_model()
def save_model(self):
def save(model, model_name):
model_path = "saved_model/%s.json" % model_name
weights_path = "saved_model/%s_weights.hdf5" % model_name
options = {"file_arch": model_path,
"file_weight": weights_path}
json_string = model.to_json()
open(options['file_arch'], 'w').write(json_string)
model.save_weights(options['file_weight'])
save(self.generator, "generator")
save(self.discriminator, "discriminator")
def sample_images(self, epoch, imgs):
r, c = 3, 5
masked_imgs, missing_parts, (y1, y2, x1, x2) = self.mask_randomly(imgs)
gen_missing = self.generator.predict(masked_imgs)
imgs = 0.5 * imgs + 0.5
masked_imgs = 0.5 * masked_imgs + 0.5
gen_missing = 0.5 * gen_missing + 0.5
fig, axs = plt.subplots(r, c)
# imshow 绘制原图 遮挡图 和修复图
for i in range(c):
axs[0, i].imshow(imgs[i, :, :])
axs[0, i].axis('off')
axs[1, i].imshow(masked_imgs[i, :, :])
axs[1, i].axis('off')
filled_in = imgs[i].copy()
filled_in[y1[i]:y2[i], x1[i]:x2[i], :] = gen_missing[i]
axs[2, i].imshow(filled_in)
axs[2, i].axis('off')
fig.savefig("images/%d.png" % epoch, dpi=256)
plt.close()
def mask_randomly(self, imgs):
y1 = np.random.randint(0, self.img_rows-self.mask_height, imgs.shape[0])
y2 = y1 + self.mask_height
x1 = np.random.randint(0, self.img_rows - self.mask_width, imgs.shape[0])
x2 = x1 + self.mask_width
# 复制原图 待遮挡(这里只需要完成像素置0即可完成遮挡)
masked_imgs = np.empty_like(imgs)
# 丢失区域内容大小尺寸定义完毕 (这里只需要将丢失的像素点复制进来)
missing_parts = np.empty((imgs.shape[0], self.mask_height, self.mask_width,
self.channels))
for i, img in enumerate(imgs):
masked_img = img.copy() # 首先复制原图 也就是准备完成遮挡单个图
_y1, _y2, _x1, _x2 = y1[i], y2[i], x1[i], x2[i] # 随机生成的每个遮挡坐标
missing_parts[i] = masked_img[_y1:_y2, _x1:_x2, :].copy()
masked_img[_y1:_y2, _x1:_x2, :] = 0 # 置0操作 完成遮挡
masked_imgs[i] = masked_img # 存入 masked_imgs
return masked_imgs, missing_parts, (y1, y2, x1, x2)
if __name__ == '__main__':
context_encoder = ContextEncoder()
context_encoder.train(epochs=30000, batch_size=16, sample_interval=50)
运行结果
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 128, 128, 64) 3136
_________________________________________________________________
leaky_re_lu (LeakyReLU) (None, 128, 128, 64) 0
_________________________________________________________________
batch_normalization (BatchNo (None, 128, 128, 64) 256
_________________________________________________________________
conv2d_1 (Conv2D) (None, 64, 64, 64) 65600
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU) (None, 64, 64, 64) 0
_________________________________________________________________
batch_normalization_1 (Batch (None, 64, 64, 64) 256
_________________________________________________________________
conv2d_2 (Conv2D) (None, 32, 32, 128) 131200
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU) (None, 32, 32, 128) 0
_________________________________________________________________
batch_normalization_2 (Batch (None, 32, 32, 128) 512
_________________________________________________________________
conv2d_3 (Conv2D) (None, 16, 16, 256) 524544
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU) (None, 16, 16, 256) 0
_________________________________________________________________
batch_normalization_3 (Batch (None, 16, 16, 256) 1024
_________________________________________________________________
conv2d_4 (Conv2D) (None, 8, 8, 512) 2097664
_________________________________________________________________
leaky_re_lu_4 (LeakyReLU) (None, 8, 8, 512) 0
_________________________________________________________________
batch_normalization_4 (Batch (None, 8, 8, 512) 2048
_________________________________________________________________
up_sampling2d (UpSampling2D) (None, 16, 16, 512) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 16, 16, 256) 2097408
_________________________________________________________________
activation (Activation) (None, 16, 16, 256) 0
_________________________________________________________________
batch_normalization_5 (Batch (None, 16, 16, 256) 1024
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 32, 32, 256) 0
_________________________________________________________________
conv2d_6 (Conv2D) (None, 32, 32, 128) 524416
_________________________________________________________________
activation_1 (Activation) (None, 32, 32, 128) 0
_________________________________________________________________
batch_normalization_6 (Batch (None, 32, 32, 128) 512
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 64, 64, 128) 0
_________________________________________________________________
conv2d_7 (Conv2D) (None, 64, 64, 64) 131136
_________________________________________________________________
activation_2 (Activation) (None, 64, 64, 64) 0
_________________________________________________________________
batch_normalization_7 (Batch (None, 64, 64, 64) 256
_________________________________________________________________
conv2d_8 (Conv2D) (None, 64, 64, 3) 771
_________________________________________________________________
activation_3 (Activation) (None, 64, 64, 3) 0
=================================================================
Total params: 5,581,763
Trainable params: 5,578,819
Non-trainable params: 2,944
_________________________________________________________________
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_9 (Conv2D) (None, 32, 32, 64) 3136
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU) (None, 32, 32, 64) 0
_________________________________________________________________
batch_normalization_8 (Batch (None, 32, 32, 64) 256
_________________________________________________________________
conv2d_10 (Conv2D) (None, 16, 16, 128) 131200
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU) (None, 16, 16, 128) 0
_________________________________________________________________
batch_normalization_9 (Batch (None, 16, 16, 128) 512
_________________________________________________________________
conv2d_11 (Conv2D) (None, 8, 8, 256) 524544
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU) (None, 8, 8, 256) 0
_________________________________________________________________
batch_normalization_10 (Batc (None, 8, 8, 256) 1024
_________________________________________________________________
conv2d_12 (Conv2D) (None, 4, 4, 512) 2097664
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU) (None, 4, 4, 512) 0
_________________________________________________________________
batch_normalization_11 (Batc (None, 4, 4, 512) 2048
_________________________________________________________________
flatten (Flatten) (None, 8192) 0
_________________________________________________________________
dense (Dense) (None, 1) 8193
=================================================================
Total params: 2,768,577
Trainable params: 2,766,657
Non-trainable params: 1,920
_________________________________________________________________
X_train.shape (5000, 256, 256, 3) —————————数据集加载完成—————————
开始训练
训练到后面其实没什么变化,loss最低0.07
修复图片
刚开始
训练到后面比较好的几张,有模糊的明暗分界线
其他
模型:
1.调参,损失函数和误差分析
2.加入AlexNet,实现不规则区域修复
3.加入自我创新点——算法(不太可能),应用创新(换个马甲?还没想出来)
论文:
1.之后和现在粗糙模型对比🐕
Reference
[1] https://www.bilibili.com/video/BV1cU4y1L7BL