Context Encoders | PBL踩坑中QAQ

最新推荐文章于 2024-09-12 08:09:32 发布

zxxwill

最新推荐文章于 2024-09-12 08:09:32 发布

阅读量792

点赞数 2

本文链接：https://blog.csdn.net/zxxwill/article/details/119530951

版权

在pycharm中选择anaconda环境

tensorflow和pytorch都要下载gpu版本

Context Encoders

pycharm中新建项目

train_images下放arch文件夹存放训练图片

修复的图片自动保存在images文件夹中

头文件

import glob
import cv2
import os
import matplotlib.pyplot as plt
#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Conv2D, LeakyReLU, BatchNormalization, UpSampling2D
from tensorflow.keras.layers import Activation, Input, Flatten, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np

import tensorflow as tf

会有warning，查了下好像是因为 tensorflow2.0 的原因，所以改为

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

#使用256*256图片，与原论文有出入

可以找找别的images数据集

class ContextEncoder():
    def __init__(self):
        self.img_rows = 256
        self.img_cols = 256

        self.mask_height = 64
        self.mask_width = 64

        self.channels = 3

        self.sum_classes = 1

        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        self.missing_shape = (self.mask_height, self.mask_width, self.channels)

        optimizer = Adam(0.0002, 0.5)

        # 生成器判别器
        self.generator = self.build_generator()

        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy',
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # 准备联合训练
        self.discriminator.trainable = False
        masked_img = Input(shape=self.img_shape)
        gen_missing = self.generator(masked_img)
        valid = self.discriminator(gen_missing)
        self.combined = Model(masked_img, [gen_missing, valid])
        self.combined.compile(loss=['mse', 'binary_crossentropy'],
                              loss_weights=[0.999, 0.001],
                              optimizer=optimizer)


    def build_generator(self):

        model = Sequential()

        # 先定义编码器

        # 输入256*256*3的遮挡图
        model.add(Conv2D(64, kernel_size=4, strides=2, input_shape=self.img_shape, 
        padding="same"))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        # ——>128*12*64 特征图
        model.add(Conv2D(64, kernel_size=4, strides=2,  padding="same"))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Conv2D(128, kernel_size=4, strides=2,  padding="same"))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Conv2D(256, kernel_size=4, strides=2, padding="same"))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Conv2D(512, kernel_size=4, strides=2, padding="same"))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        # 8*8*512

#中间层

如果直接flatten 全连接层（据说）会参数爆炸

所以用channel wise 全连接层实现

可是tensorflow基于静态图的模型搭建方式，需要自己编写函数

        # Decoder ——>上采样＋卷积
        model.add(UpSampling2D())
        model.add(Conv2D(256, kernel_size=4, padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization(momentum=0.8))
        # 第一层upconv完成 ——> 16*16*256

        model.add(UpSampling2D())
        model.add(Conv2D(128, kernel_size=4, padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization(momentum=0.8))
        # 32*32*128

        model.add(UpSampling2D())
        model.add(Conv2D(64, kernel_size=4, padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization(momentum=0.8))
        # 64*64*64

        model.add(Conv2D(self.channels, kernel_size=2, padding='same'))
        model.add(Activation('tanh'))

        # 64*64*3 输出完成

        model.summary()

        masked_img = Input(shape=self.img_shape)
        gen_missing = model(masked_img)

        return Model(masked_img, gen_missing)

    def build_discriminator(self):

        model = Sequential()

        # 64*64*3
        model.add(Conv2D(64, kernel_size=4, strides=2, input_shape=self.missing_shape, 
        padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        # 32*32*64

        model.add(Conv2D(128, kernel_size=4, strides=2, padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        # 16*16*128

        model.add(Conv2D(256, kernel_size=4, strides=2, padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        # 8*8*256

        model.add(Conv2D(512, kernel_size=4, strides=2, padding='same'))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        # 4*4*512

        model.add(Flatten())
        # 16384

        model.add(Dense(1, activation='sigmoid'))

        model.summary()

        img = Input(self.missing_shape)
        validity = model(img)
        # 完成了输入64*64*3 图片 输出真是概率

        return Model(img, validity)

    def train(self, epochs, batch_size=128, sample_interval=50):

        if os.path.exists('saved_model/discriminator_weights.hdf5') and os.path.exists(
                'saved_model/generator_weights.hdf5'):
            self.discriminator.load_weights('saved_model/discriminator_weights.hdf5')
            self.generator.load_weights('saved_model/generator_weights.hdf5')
            print('-------------load the model-----------------')

        X_train = []

        list = glob.glob(r'train_images/arch/*.jpg')
        for l in list:
            im = cv2.imread(l)
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            X_train.append(im)
        X_train = np.array(X_train)

        print('X_train.shape', X_train.shape, "—————————————————数据集加载完成——————————")

        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # 训练判别器
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]
            imgs = imgs / 175.5 - 1.  # -1 - 1
            # 随机抽取batchsize个真实图像

            masked_imgs, missing_parts, _ = self.mask_randomly(imgs)
            # masked_imgs就代表了遮挡的batch个图像
            # missing_parts就代表了丢失的batch个图像块

            gen_missing = self.generator.predict(masked_imgs)
            # 通过真假两个图训练判别器
            d_loss_real = self.discriminator.train_on_batch(missing_parts, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_missing, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)  # 返回损失值和准确率

            # 训练生成器
            g_loss = self.combined.train_on_batch(masked_imgs, [missing_parts, valid])
            # 在这里 使用的是 MSE ADloss

            # 打印损失值以及准确率
            print("%d [D loss: %f, acc: %.2f%%] [G loss mse: %f, ad loss: %f]" % (
                epoch, d_loss[0], 100 * d_loss[1], g_loss[0], g_loss[1]))
            # d_loss[0]判别器损失, d_loss[1]准确率, g_loss[0]联合模型的重建损失, g_loss[1]联 
            合模型的对抗损失
            if epoch % sample_interval == 0:
                # 随机生成5个整数
                idx = np.random.randint(0, X_train.shape[0], 5)
                imgs = X_train[idx]
                imgs = imgs / 127.5 - 1.
                self.sample_images(epoch, imgs)
            if epoch % 1000 == 0:
                self.save_model()

    def save_model(self):

        def save(model, model_name):
            model_path = "saved_model/%s.json" % model_name
            weights_path = "saved_model/%s_weights.hdf5" % model_name
            options = {"file_arch": model_path,
                       "file_weight": weights_path}
            json_string = model.to_json()
            open(options['file_arch'], 'w').write(json_string)
            model.save_weights(options['file_weight'])

        save(self.generator, "generator")
        save(self.discriminator, "discriminator")

    def sample_images(self, epoch, imgs):
        r, c = 3, 5
        masked_imgs, missing_parts, (y1, y2, x1, x2) = self.mask_randomly(imgs)
        gen_missing = self.generator.predict(masked_imgs)

        imgs = 0.5 * imgs + 0.5
        masked_imgs = 0.5 * masked_imgs + 0.5
        gen_missing = 0.5 * gen_missing + 0.5

        fig, axs = plt.subplots(r, c)
        # imshow 绘制原图 遮挡图 和修复图
        for i in range(c):
            axs[0, i].imshow(imgs[i, :, :])
            axs[0, i].axis('off')
            axs[1, i].imshow(masked_imgs[i, :, :])
            axs[1, i].axis('off')
            filled_in = imgs[i].copy()
            filled_in[y1[i]:y2[i], x1[i]:x2[i], :] = gen_missing[i]
            axs[2, i].imshow(filled_in)
            axs[2, i].axis('off')
        fig.savefig("images/%d.png" % epoch, dpi=256)
        plt.close()

    def mask_randomly(self, imgs):
        y1 = np.random.randint(0, self.img_rows-self.mask_height, imgs.shape[0])
        y2 = y1 + self.mask_height
        x1 = np.random.randint(0, self.img_rows - self.mask_width, imgs.shape[0])
        x2 = x1 + self.mask_width

        # 复制原图 待遮挡（这里只需要完成像素置0即可完成遮挡）
        masked_imgs = np.empty_like(imgs)

        # 丢失区域内容大小尺寸定义完毕 （这里只需要将丢失的像素点复制进来）
        missing_parts = np.empty((imgs.shape[0], self.mask_height, self.mask_width, 
        self.channels))

        for i, img in enumerate(imgs):
            masked_img = img.copy()  # 首先复制原图 也就是准备完成遮挡单个图
            _y1, _y2, _x1, _x2 = y1[i], y2[i], x1[i], x2[i]  # 随机生成的每个遮挡坐标
            missing_parts[i] = masked_img[_y1:_y2, _x1:_x2, :].copy()
            masked_img[_y1:_y2, _x1:_x2, :] = 0  # 置0操作 完成遮挡
            masked_imgs[i] = masked_img  # 存入 masked_imgs
        return masked_imgs, missing_parts, (y1, y2, x1, x2)


if __name__ == '__main__':
    context_encoder = ContextEncoder()
    context_encoder.train(epochs=30000, batch_size=16, sample_interval=50)

运行结果

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 128, 128, 64)      3136      
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 128, 128, 64)      0         
_________________________________________________________________
batch_normalization (BatchNo (None, 128, 128, 64)      256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 64)        65600     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 64, 64, 64)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 64, 64, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 128)       131200    
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 32, 32, 128)       0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 32, 32, 128)       512       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 16, 16, 256)       524544    
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 16, 16, 256)       0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 16, 16, 256)       1024      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 8, 8, 512)         2097664   
_________________________________________________________________
leaky_re_lu_4 (LeakyReLU)    (None, 8, 8, 512)         0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 8, 8, 512)         2048      
_________________________________________________________________
up_sampling2d (UpSampling2D) (None, 16, 16, 512)       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 16, 16, 256)       2097408   
_________________________________________________________________
activation (Activation)      (None, 16, 16, 256)       0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 16, 16, 256)       1024      
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 32, 32, 256)       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 32, 32, 128)       524416    
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 128)       0         
_________________________________________________________________
batch_normalization_6 (Batch (None, 32, 32, 128)       512       
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 64, 64, 128)       0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 64, 64, 64)        131136    
_________________________________________________________________
activation_2 (Activation)    (None, 64, 64, 64)        0         
_________________________________________________________________
batch_normalization_7 (Batch (None, 64, 64, 64)        256       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 64, 64, 3)         771       
_________________________________________________________________
activation_3 (Activation)    (None, 64, 64, 3)         0         
=================================================================
Total params: 5,581,763
Trainable params: 5,578,819
Non-trainable params: 2,944
_________________________________________________________________
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_9 (Conv2D)            (None, 32, 32, 64)        3136      
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 32, 32, 64)        0         
_________________________________________________________________
batch_normalization_8 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 16, 16, 128)       131200    
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 16, 16, 128)       0         
_________________________________________________________________
batch_normalization_9 (Batch (None, 16, 16, 128)       512       
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 8, 8, 256)         524544    
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 8, 8, 256)         0         
_________________________________________________________________
batch_normalization_10 (Batc (None, 8, 8, 256)         1024      
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 4, 4, 512)         2097664   
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU)    (None, 4, 4, 512)         0         
_________________________________________________________________
batch_normalization_11 (Batc (None, 4, 4, 512)         2048      
_________________________________________________________________
flatten (Flatten)            (None, 8192)              0         
_________________________________________________________________
dense (Dense)                (None, 1)                 8193      
=================================================================
Total params: 2,768,577
Trainable params: 2,766,657
Non-trainable params: 1,920
_________________________________________________________________
X_train.shape (5000, 256, 256, 3) —————————数据集加载完成—————————

开始训练