# Tensorflow实现二次元图片的超分辨率

15 篇文章 3 订阅
10 篇文章 25 订阅

github上有一个很有意思的项目，waifu2x，原理是通过一个训练好的CNN，将低分辨率的图像放大2倍或更多，同时保留足够的细节，使放大后的图像不会过于模糊或失真。该项目是用lua+Touch写的，最近在学习Tensorflow，闲暇之余打算在TF上自己实现一个这样的系统。

## 1. 网络选择

def srcnn_935(patches, name='srcnn'):
with tf.variable_scope(name):
upscaled_patches = tf.image.resize_bicubic(patches, [INPUT_SIZE, INPUT_SIZE], True)
conv1 = conv2d(upscaled_patches, 9, 9, 64, padding='VALID', name='conv1')
relu1 = relu(conv1, name='relu1')
conv2 = conv2d(relu1, 3, 3, 32, padding='VALID', name='conv2')
relu2 = relu(conv2, name='relu2')
return conv2d(relu2, 5, 5, NUM_CHENNELS, padding='VALID', name='conv3')

def vgg7(patches, name='vgg7'):
with tf.variable_scope(name):
upscaled_patches = tf.image.resize_bicubic(patches, [INPUT_SIZE, INPUT_SIZE], True)
conv1 = conv2d(upscaled_patches, 3, 3, 32, padding='VALID', name='conv1')
lrelu1 = leaky_relu(conv1, name='leaky_relu1')
conv2 = conv2d(lrelu1, 3, 3, 32, padding='VALID', name='conv2')
lrelu2 = leaky_relu(conv2, name='leaky_relu2')
conv3 = conv2d(lrelu2, 3, 3, 64, padding='VALID', name='conv3')
lrelu3 = leaky_relu(conv3, name='leaky_relu3')
conv4 = conv2d(lrelu3, 3, 3, 64, padding='VALID', name='conv4')
lrelu4 = leaky_relu(conv4, name='leaky_relu4')
conv5 = conv2d(lrelu4, 3, 3, 128, padding='VALID', name='conv5')
lrelu5 = leaky_relu(conv5, name='leaky_relu5')
conv6 = conv2d(lrelu5, 3, 3, 128, padding='VALID', name='conv6')
lrelu6 = leaky_relu(conv6, name='leaky_relu6')
return conv2d(lrelu6, 3, 3, NUM_CHENNELS, padding='VALID', name='conv_out')

def vgg_deconv_7(patches, name='vgg_deconv_7'):
with tf.variable_scope(name):
conv1 = conv2d(patches, 3, 3, 16, padding='VALID', name='conv1')
lrelu1 = leaky_relu(conv1, name='leaky_relu1')
conv2 = conv2d(lrelu1, 3, 3, 32, padding='VALID', name='conv2')
lrelu2 = leaky_relu(conv2, name='leaky_relu2')
conv3 = conv2d(lrelu2, 3, 3, 64, padding='VALID', name='conv3')
lrelu3 = leaky_relu(conv3, name='leaky_relu3')
conv4 = conv2d(lrelu3, 3, 3, 128, padding='VALID', name='conv4')
lrelu4 = leaky_relu(conv4, name='leaky_relu4')
conv5 = conv2d(lrelu4, 3, 3, 128, padding='VALID', name='conv5')
lrelu5 = leaky_relu(conv5, name='leaky_relu5')
conv6 = conv2d(lrelu5, 3, 3, 256, padding='VALID', name='conv6')
lrelu6 = leaky_relu(conv6, name='leaky_relu6')

batch_size = int(lrelu6.get_shape()[0])
rows = int(lrelu6.get_shape()[1])
cols = int(lrelu6.get_shape()[2])
channels = int(patches.get_shape()[3])
# to avoid chessboard artifacts, the filter size must be dividable by the stride
return deconv2d(lrelu6, 4, 4, [batch_size, rows*2, cols*2, channels], stride=(2, 2), name='deconv_out')

## 2. 数据准备

waifu2x的作者使用了3000幅无损原画作为训练数据，我没有那么多精力收集这样的数据，于是就在Konachanyande上面找了100张图片，尽可能找分辨率高的，比如不低于1080p的图像，同时绝对不能要jpg格式或其他有损压缩格式的图像，有损压缩会引入噪声（jpeg artifacts）。（咳咳~这两个网站里面有一些图片很污，请控制好自己）

## 3. 数据增强

def batch_queue_for_training(data_path):
filename_queue = tf.train.string_input_producer(tf.train.match_filenames_once(join(data_path, '*.png')))
patch = tf.image.decode_png(image_file, NUM_CHENNELS)
# we must set the shape of the image before making batches
patch.set_shape([PATCH_SIZE, PATCH_SIZE, NUM_CHENNELS])
patch = tf.image.convert_image_dtype(patch, dtype=tf.float32)

if MAX_RANDOM_BRIGHTNESS > 0:
patch = tf.image.random_brightness(patch, MAX_RANDOM_BRIGHTNESS)
if len(RANDOM_CONTRAST_RANGE) == 2:
patch = tf.image.random_contrast(patch, *RANDOM_CONTRAST_RANGE)
patch = tf.image.random_flip_left_right(patch)
high_res_patch = tf.image.random_flip_up_down(patch)

crop_margin = PATCH_SIZE - LABEL_SIZE
assert crop_margin >= 0
if crop_margin > 1:
high_res_patch = tf.random_crop(patch, [LABEL_SIZE, LABEL_SIZE, NUM_CHENNELS])

downscale_size = [INPUT_SIZE, INPUT_SIZE]
resize_nn = lambda: tf.image.resize_nearest_neighbor([high_res_patch], downscale_size, True)
resize_area = lambda: tf.image.resize_area([high_res_patch], downscale_size, True)
resize_cubic = lambda: tf.image.resize_bicubic([high_res_patch], downscale_size, True)
r = tf.random_uniform([], 0, 3, dtype=tf.int32)
low_res_patch = tf.case({tf.equal(r, 0): resize_nn, tf.equal(r, 1): resize_area}, default=resize_cubic)[0]

# add jpeg noise to low_res_patch
if JPEG_NOISE_LEVEL > 0:
low_res_patch = tf.image.convert_image_dtype(low_res_patch, dtype=tf.uint8, saturate=True)
jpeg_quality = 100 - 5 * JPEG_NOISE_LEVEL
jpeg_code = tf.image.encode_jpeg(low_res_patch, quality=jpeg_quality)
low_res_patch = tf.image.decode_jpeg(jpeg_code)
low_res_patch = tf.image.convert_image_dtype(low_res_patch, dtype=tf.float32)

# we must set tensor's shape before doing following processes
low_res_patch.set_shape([INPUT_SIZE, INPUT_SIZE, NUM_CHENNELS])

if GAUSSIAN_NOISE_STD > 0:
low_res_patch += tf.random_normal(low_res_patch.get_shape(), stddev=GAUSSIAN_NOISE_STD)

low_res_patch = tf.clip_by_value(low_res_patch, 0, 1.0)
high_res_patch = tf.clip_by_value(high_res_patch, 0, 1.0)
# Generate batch
low_res_batch, high_res_batch = tf.train.shuffle_batch(
[low_res_patch, high_res_patch],
batch_size=BATCH_SIZE,
capacity=MIN_QUEUE_EXAMPLES + 3 * BATCH_SIZE,
min_after_dequeue=MIN_QUEUE_EXAMPLES)

return low_res_batch, high_res_batch

## 4. 训练

loss函数使用均方误差（MSE），如果有较大噪声，可以考虑使用对噪声更鲁棒的Huber Loss。waifu2x在求均方误差时做了一些小修改，不同通道的误差乘以了不同的权值，BGR对应的权重分别为0.11448, 0.58661, 0.29891，这三个值来自彩色图像到灰度图像的转换公式，由于人眼对绿色是最为敏感的，因此G分量的权值最高。这样做能使重建后的图像在颜色上更友好，不会出现偏色。

def loss(inferences, ground_truthes, weights_decay=0, name='loss'):
with tf.name_scope(name):
slice_begin = (int(ground_truthes.get_shape()[1]) - int(inferences.get_shape()[1])) // 2
slice_end = int(inferences.get_shape()[1]) + slice_begin
delta = inferences - ground_truthes[:, slice_begin: slice_end, slice_begin: slice_end, :]

delta *= [[[[0.11448, 0.58661, 0.29891]]]]  # weights of B, G and R
l2_loss = tf.pow(delta, 2)
mse_loss = tf.reduce_mean(tf.reduce_sum(l2_loss, axis=[1, 2, 3]))

if weights_decay > 0:
weights = tf.get_collection('weights')
reg_loss = weights_decay * tf.reduce_sum(
tf.pack([tf.nn.l2_loss(i) for i in weights]), name='regularization_loss')
return mse_loss + reg_loss
else:
return mse_loss

waifu2x训练了2000个epoch，花费了12小时。本人计算机算力有限，数据有限，所以只训练了100个epoch，但是也达到了还不错的结果。

## 5. 使用

03-18
01-23
11-07 1万+
06-24 849
05-31
10-09 1274
06-06 121
05-08 5366
03-16 58

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

aipiano

¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、C币套餐、付费专栏及课程。