参考链接:
https://blog.csdn.net/dcrmg/article/details/82892909
https://blog.csdn.net/leviopku/article/details/83818997
棋盘效应:卷积网络生成图像时,经常带有类似马赛克一样的小方格,这现象叫做“棋盘效应”。
产生原因:
当我们有神经网络生成图像时,我们经常让它们从低分辨率,高级描述中构建它们,这允许网络描述粗略图像,然后填写细节;
为了做到这一点,我们需要一些方法从较低分辨率的图像到较高分辨率的图像,我们通常使用反卷积操作来完成此操作;粗略地,反卷积层允许模型使用小图像中的每个点来“绘制”较大图像中的正方形。
不幸的是,反卷积很容易产生“不均匀的重叠”,特别是,当内核大小(输出窗口大小)不能被步幅(顶部点之间的间隔)整除时,反卷积具有不均匀的重叠;原则上,神经网络很难完全避免它。
解决办法:
1.使用UpSampling2D+Conv2D 代替 Conv2DTranspose;---1
2.使用subpixel conv 代替 上采样层(Conv2DTranspose);
3.调节kernel size和stride的大小关系以确保他们尽量能整除;---0
4.在网络末尾加一层stride为1的反卷积,作为调整。---0
5.PixelShuffle
https://blog.csdn.net/qq_34622844/article/details/88936690
https://github.com/KupynOrest/DeblurGAN/issues/41
https://distill.pub/2016/deconv-checkerboard/
代码实现:
train.py
import os
import datetime
import numpy as np
import tensorflow as tf
from utils import load_images
from losses import wasserstein_loss, perceptual_loss
from model import generator_model, discriminator_model, generator_containing_discriminator_multiple_outputs
from keras.optimizers import Adam
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
config = tf.ConfigProto()
config.gpu_options.allow_growth=True #不全部占满显存, 按需分配
# 保存生成器、判别器模型
def save_all_weights(model_path, d, g, d_on_g, epoch_number, current_loss):
# # 按日期创建文件夹,保存模型
# now = datetime.datetime.now()
# save_dir = os.path.join(model_path, '{}.{}'.format(now.month, now.day))
# if not os.path.exists(save_dir):
# os.makedirs(save_dir)
g.save_weights(os.path.join(model_path, 'generator_epo-{}.h5'.format(epoch_number)), True)
# d.save_weights(os.path.join(model_path, 'discriminator_epo-{}.h5'.format(epoch_number)), True)
# d_on_g.save_weights(os.path.join(model_path, 'd_on_g_epo-{}.h5'.format(epoch_number)), True)
def train_multiple_outputs(train_path, model_path, n_images, batch_size, epoch_num, critic_updates=5):
# 加载数据集
data = load_images(train_path, n_images)
x_train, y_train = data['blur'], data['sharp']
# 创建gan的训练网络
g = generator_model() #生成器
d = discriminator_model() #判别器
d_on_g = generator_containing_discriminator_multiple_outputs(g, d) #联合网络
# 优化器
d_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
d_on_g_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
# trainable:控制变量是否可以被优化器更新
d.trainable = True
d.compile(optimizer=d_opt, loss=wasserstein_loss)
d.trainable = False
loss = [perceptual_loss, wasserstein_loss]
loss_weights = [100, 1]
d_on_g.compile(optimizer=d_on_g_opt, loss=loss, loss_weights=loss_weights)
d.trainable = True
output_true_batch, output_false_batch = np.ones((batch_size, 1)), np.zeros((batch_size, 1))
for epoch in range(1,epoch_num+1):
print('epoch: {}/{}'.format(epoch, epoch_num))
# 返回一个新的打乱顺序的数组下标,并且不改变原来的数组
permutated_indexes = np.random.permutation(x_train.shape[0])
d_losses = []
d_on_g_losses = []
# 按batch_size进行迭代,1个epoch = x_train / batch_size次迭代
for index in range(int(x_train.shape[0] / batch_size)):
# 每次从打乱的训练数据中,取1个batch_size的图片出来
batch_indexes = permutated_indexes[index*batch_size:(index+1)*batch_size]
image_blur_batch = x_train[batch_indexes]
image_full_batch = y_train[batch_indexes]
# 生成图片
generated_images = g.predict(x=image_blur_batch, batch_size=batch_size)
for _ in range(critic_updates):
d_loss_real = d.train_on_batch(image_full_batch, output_true_batch)
d_loss_fake = d.train_on_batch(generated_images, output_false_batch)
d_loss = 0.5 * np.add(d_loss_fake, d_loss_real)
d_losses.append(d_loss)
print('epoch: {}/{}'.format(epoch, epoch_num),'batch {} d_loss : {}'.format(index+1, np.mean(d_losses)))
d.trainable = False
d_on_g_loss = d_on_g.train_on_batch(image_blur_batch, [image_full_batch, output_true_batch])
d_on_g_losses.append(d_on_g_loss)
print('epoch: {}/{}'.format(epoch, epoch_num),'batch {} d_on_g_loss : {}'.format(index+1, d_on_g_loss))
d.trainable = True
# 保存训练过程的loss
with open(model_path+'log.txt', 'a+') as f:
f.write('epoch:{} - d_losses:{} - d_on_g_losses:{}\n'.format(epoch, np.mean(d_losses), np.mean(d_on_g_losses))+'\n')
# 保存生成器、判别器模型
if epoch % 20 == 0 or epoch == (epoch_num):
save_all_weights(model_path, d, g, d_on_g, epoch, int(np.mean(d_on_g_losses)))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='')
# 训练集、测试集、生成器、判别器、batch_size、epoch
# parser.add_argument('--train_path', type=str, default='E:/deblur_data/train/')
parser.add_argument('--train_path', type=str, default='data/face/train/')
parser.add_argument('--model_path', type=str, default='save_model2/test1/')
parser.add_argument('--n_images', default=3000)
parser.add_argument('--batch_size', default=8)
parser.add_argument('--epoch_num', default=300)
parser.add_argument('--critic_updates', default=5)
args = parser.parse_args()
train_multiple_outputs(args.train_path, args.model_path, args.n_images, args.batch_size, args.epoch_num, args.critic_updates)
test.py
import numpy as np
from PIL import Image
from model import generator_model
from utils import load_images, deprocess_image
def test(test_path, result, model_path, pic_num):
# 加载测试数据,x_test是blur,y_test是sharp
data = load_images(test_path, pic_num)
x_test, y_test = data['blur'], data['sharp']
# 加载生成器、权重模型
g = generator_model()
g.load_weights(model_path)
# 生成图片,并转换格式
generated_images = g.predict(x=x_test, batch_size=pic_num)
generated = np.array([deprocess_image(img) for img in generated_images])
x_test = deprocess_image(x_test)
y_test = deprocess_image(y_test)
# 把原图和生成图拼在一起,并保存图片
for i in range(generated_images.shape[0]):
y = y_test[i, :, :, :]
x = x_test[i, :, :, :]
img = generated[i, :, :, :]
output = np.concatenate((y, x, img), axis=1)
im = Image.fromarray(img.astype(np.uint8))
im.save(result+'{}.jpg'.format(i))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='')
# 训练集、测试集、生成器、判别器、batch_size、epoch
# parser.add_argument('--test_path', type=str, default='../../data/udc_img/test_mix/')
# parser.add_argument('--result', type=str, default='result/test1/')
parser.add_argument('--test_path', type=str, default='data/1/')
parser.add_argument('--result', type=str, default='data/1/')
parser.add_argument('--model_path', type=str, default='save_model2/test1/generator_epo-80.h5')
parser.add_argument('--pic_num', default=200, help='Number of images to process')
args = parser.parse_args()
test(args.test_path, args.result, args.model_path, args.pic_num)
utils.py
import os
from PIL import Image
import numpy as np
RESHAPE = (256,256)
# 判断是否图片格式
def is_an_image_file(filename):
IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg']
for ext in IMAGE_EXTENSIONS:
if ext in filename:
return True
return False
# 获取路径下所有图片的文件名
def list_image_files(directory):
files = os.listdir(directory)
return [os.path.join(directory, f) for f in files if is_an_image_file(f)]
# Image.open读取图片
def load_image(path):
img = Image.open(path)
return img
# 获取路径下所有图片和文件名,返回出去
def load_images(path, pic_num):
if pic_num < 0:
pic_num = float("inf")
# 获取目录下所有图片路径
A_paths, B_paths = os.path.join(path, 'blur'), os.path.join(path, 'sharp')
all_A_paths, all_B_paths = list_image_files(A_paths), list_image_files(B_paths)
images_A, images_B = [], []
images_A_paths, images_B_paths = [], []
# 加载A、B图片和文件名
for path_A, path_B in zip(all_A_paths, all_B_paths): #把A和B成对的压缩在一起
img_A, img_B = load_image(path_A), load_image(path_B) #一对对的读取A、B图片
images_A.append(preprocess_image(img_A))
images_B.append(preprocess_image(img_B))
images_A_paths.append(path_A)
images_B_paths.append(path_B)
if len(images_A) > pic_num - 1:
print(len(images_A),pic_num)
break
return {
'blur': np.array(images_A),
'blur_paths': np.array(images_A_paths),
'sharp': np.array(images_B),
'sharp_paths': np.array(images_B_paths)
}
def preprocess_image(cv_img):
cv_img = cv_img.resize(RESHAPE)
img = np.array(cv_img)
img = (img - 127.5) / 127.5
return img
def deprocess_image(img):
img = img * 127.5 + 127.5
return img.astype('uint8')
def save_image(np_arr, path):
img = np_arr * 127.5 + 127.5
im = Image.fromarray(img)
im.save(path)
model.py
from keras.layers import Input, Activation, Add, UpSampling2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.core import Dense, Flatten, Lambda
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from layer_utils import ReflectionPadding2D, res_block
import keras.backend as K
# the paper defined hyper-parameter:chr
channel_rate = 64
# Note the image_shape must be multiple of patch_shape
size=256
image_shape = (size, size, 3)
patch_shape = (channel_rate, channel_rate, 3)
ngf = 64
ndf = 64
input_nc = 3
output_nc = 3
input_shape_generator = (size, size, input_nc)
input_shape_discriminator = (size, size, output_nc)
n_blocks_gen = 9
# 生成器
def generator_model():
inputs = Input(shape=image_shape)
# 1. 对输入图像做一个边界扩展(宽高各6个像素)
# 2. 卷积核大小7×7的卷积,方式是valid,之后执行批规范化BN操作,再执行 Relu激活函数
x = ReflectionPadding2D((3, 3))(inputs)
x = Conv2D(filters=ngf, kernel_size=(7, 7), padding='valid')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
# 3. 两次下采样操作,每次特征图大小缩小为之前的二分之一,具体操作包括 same 卷积,BN和Relu激活
n_downsampling = 2
for i in range(n_downsampling):
mult = 2**i
x = Conv2D(filters=ngf*mult*2, kernel_size=(3, 3), strides=2, padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
# 4. 9个残差模块,每个模块的操作包括边界扩充、卷积、BN、Relu激活、扩充、卷积、BN,其中dropout可选,
mult = 2**n_downsampling
for i in range(n_blocks_gen):
x = res_block(x, ngf*mult, use_dropout=True)
# 5. 接着残差模块的是2组卷积、BN和激活操作,
for i in range(n_downsampling):
mult = 2**(n_downsampling - i)
# 修改模型1
x = UpSampling2D()(x)
x = Conv2D(filters=int(ngf * mult / 2), kernel_size=(3, 3), padding='same')(x)
# # 修改模型2
# x = Conv2DTranspose(filters=int(ngf * mult / 2), kernel_size=(3, 3), strides=2, padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
# 6. 最后是边界扩充、卷积、Tanh激活
x = Conv2DTranspose(filters=int(ngf * mult / 2), kernel_size=(3, 3), strides=1, padding='same')(x)
x = ReflectionPadding2D((3, 3))(x)
x = Conv2D(filters=output_nc, kernel_size=(7, 7), padding='valid')(x)
x = Activation('tanh')(x)
# 7. Add输入操作,输出结果是一个维度大小跟输入一致的图片。
outputs = Add()([x, inputs])
outputs = Lambda(lambda z: z/2)(outputs)
model = Model(inputs=inputs, outputs=outputs, name='Generator')
return model
# 判别器
def discriminator_model():
n_layers, use_sigmoid = 3, False
inputs = Input(shape=input_shape_discriminator)
# 1. 卷积+LeakyRelu操作,LeakyRelu第三象限的斜率设为0.2
x = Conv2D(filters=ndf, kernel_size=(4, 4), strides=2, padding='same')(inputs)
x = LeakyReLU(0.2)(x)
# 2. 4组卷积+BN+LeakyRelu, 3+1
nf_mult, nf_mult_prev = 1, 1
for n in range(n_layers):
nf_mult_prev, nf_mult = nf_mult, min(2**n, 8)
x = Conv2D(filters=ndf*nf_mult, kernel_size=(4, 4), strides=2, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
nf_mult_prev, nf_mult = nf_mult, min(2**n_layers, 8)
x = Conv2D(filters=ndf*nf_mult, kernel_size=(4, 4), strides=1, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
# 3. sigmoid激活可选
x = Conv2D(filters=1, kernel_size=(4, 4), strides=1, padding='same')(x)
if use_sigmoid:
x = Activation('sigmoid')(x)
# 4. Flatten展平,为全连接层做准备
# 5. 2个Dense全连接层,最后输出做sigmoid,限制结果到0~1
x = Flatten()(x)
x = Dense(1024, activation='tanh')(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=x, name='Discriminator')
return model
def generator_containing_discriminator(generator, discriminator):
inputs = Input(shape=image_shape)
generated_image = generator(inputs)
outputs = discriminator(generated_image)
model = Model(inputs=inputs, outputs=outputs)
return model
def generator_containing_discriminator_multiple_outputs(generator, discriminator):
inputs = Input(shape=image_shape)
generated_image = generator(inputs)
outputs = discriminator(generated_image)
model = Model(inputs=inputs, outputs=[generated_image, outputs])
return model
if __name__ == '__main__':
g = generator_model()
g.summary()
d = discriminator_model()
d.summary()
m = generator_containing_discriminator(generator_model(), discriminator_model())
m.summary()
layer_utils.py
import tensorflow as tf
from keras.models import Model
from keras.engine import InputSpec
from keras.engine.topology import Layer
from keras.layers import Input, Conv2D, Activation, BatchNormalization
from keras.layers.merge import Add
from keras.utils import conv_utils
from keras.layers.core import Dropout
from keras.backend.common import normalize_data_format
# 每个模块的操作包括边界扩充、卷积、BN、Relu激活、扩充、卷积、BN,其中dropout可选
def res_block(input, filters, kernel_size=(3, 3), strides=(1, 1), use_dropout=False):
"""
Instanciate a Keras Resnet Block using sequential API.
:param input: Input tensor
:param filters: Number of filters to use
:param kernel_size: Shape of the kernel for the convolution
:param strides: Shape of the strides for the convolution
:param use_dropout: Boolean value to determine the use of dropout
:return: Keras Model
"""
x = ReflectionPadding2D((1, 1))(input)
x = Conv2D(filters=filters,
kernel_size=kernel_size,
strides=strides,)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
if use_dropout:
x = Dropout(0.5)(x)
x = ReflectionPadding2D((1, 1))(x)
x = Conv2D(filters=filters,
kernel_size=kernel_size,
strides=strides,)(x)
x = BatchNormalization()(x)
merged = Add()([input, x])
return merged
def spatial_reflection_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None):
"""
Pad the 2nd and 3rd dimensions of a 4D tensor.
:param x: Input tensor
:param padding: Shape of padding to use
:param data_format: Tensorflow vs Theano convention ('channels_last', 'channels_first')
:return: Tensorflow tensor
"""
assert len(padding) == 2
assert len(padding[0]) == 2
assert len(padding[1]) == 2
if data_format is None:
data_format = image_data_format()
if data_format not in {'channels_first', 'channels_last'}:
raise ValueError('Unknown data_format ' + str(data_format))
if data_format == 'channels_first':
pattern = [[0, 0],
[0, 0],
list(padding[0]),
list(padding[1])]
else:
pattern = [[0, 0],
list(padding[0]), list(padding[1]),
[0, 0]]
return tf.pad(x, pattern, "REFLECT")
# TODO: Credits
class ReflectionPadding2D(Layer):
def __init__(self,
padding=(1, 1),
data_format=None,
**kwargs):
super(ReflectionPadding2D, self).__init__(**kwargs)
self.data_format = normalize_data_format(data_format)
if isinstance(padding, int):
self.padding = ((padding, padding), (padding, padding))
elif hasattr(padding, '__len__'):
if len(padding) != 2:
raise ValueError('`padding` should have two elements. '
'Found: ' + str(padding))
height_padding = conv_utils.normalize_tuple(padding[0], 2,
'1st entry of padding')
width_padding = conv_utils.normalize_tuple(padding[1], 2,
'2nd entry of padding')
self.padding = (height_padding, width_padding)
else:
raise ValueError('`padding` should be either an int, '
'a tuple of 2 ints '
'(symmetric_height_pad, symmetric_width_pad), '
'or a tuple of 2 tuples of 2 ints '
'((top_pad, bottom_pad), (left_pad, right_pad)). '
'Found: ' + str(padding))
self.input_spec = InputSpec(ndim=4)
def compute_output_shape(self, input_shape):
if self.data_format == 'channels_first':
if input_shape[2] is not None:
rows = input_shape[2] + self.padding[0][0] + self.padding[0][1]
else:
rows = None
if input_shape[3] is not None:
cols = input_shape[3] + self.padding[1][0] + self.padding[1][1]
else:
cols = None
return (input_shape[0],
input_shape[1],
rows,
cols)
elif self.data_format == 'channels_last':
if input_shape[1] is not None:
rows = input_shape[1] + self.padding[0][0] + self.padding[0][1]
else:
rows = None
if input_shape[2] is not None:
cols = input_shape[2] + self.padding[1][0] + self.padding[1][1]
else:
cols = None
return (input_shape[0],
rows,
cols,
input_shape[3])
def call(self, inputs):
return spatial_reflection_2d_padding(inputs,
padding=self.padding,
data_format=self.data_format)
def get_config(self):
config = {'padding': self.padding,
'data_format': self.data_format}
base_config = super(ReflectionPadding2D, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
if __name__ == "__main__":
input = Input(shape=(256, 256, 3))
x = ReflectionPadding2D(3)(input)
model = Model(input, x)
model.summary()
losses.py
import keras.backend as K
from keras.applications.vgg16 import VGG16
from keras.models import Model
# Note the image_shape must be multiple of patch_shape
image_shape = (256, 256, 3)
def l1_loss(y_true, y_pred):
return K.mean(K.abs(y_pred - y_true))
def perceptual_loss_100(y_true, y_pred):
return 100 * perceptual_loss(y_true, y_pred)
# 使用的是VGG16分别提取生成图片和真实图片的特征,比较的是block3_conv3层的输出,loss是特征差的平方再取均值
def perceptual_loss(y_true, y_pred):
vgg = VGG16(include_top=False, weights='imagenet', input_shape=image_shape)
loss_model = Model(inputs=vgg.input, outputs=vgg.get_layer('block3_conv3').output)
loss_model.trainable = False
return K.mean(K.square(loss_model(y_true) - loss_model(y_pred)))
# 对整个模型(G+D)的输出执行的 Wasserstein 损失,它取的是两个图像差异的均值。
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true*y_pred)