用VGG19做风格迁移的图像重绘

项目目:
在这里插入图片描述
你会教计算机如何绘画。关键的想法是有一个神经网络从模型图像推断其绘画风格。然后将这种风格转移到另一张重新绘制的图片上。
具体代码实现如下:

import os
import sys
import numpy as np
import scipy.io
import scipy.misc
import tensorflow as tf  
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
%matplotlib inline
from __future__ import division

设置用于学习的风格图像和要重绘的内容图像的输入路径:

OUTPUT_DIR = 'output/'
# Style image
STYLE_IMAGE = 'data/StarryNight.jpg'
# Content image to be repainted
CONTENT_IMAGE = 'data/Marilyn_Monroe_in_1952.jpg'

设置生成图像的噪声比,将重点放在重绘内容图像时的内容损失和风格损失上。除此之外,存储了预训练的VGG模型的路径以及在VGG预训练期间计算的平均值,这个平均值是已知的,并从输入图像减去该均值之后在输入到VGG模型:

IMAGE_HEIGHT = 300
IMAGE_WIDTH = 300
# how much noise is in the image
NOISE_RATIO = 0.6
# how much emphasis on content loss
BETA = 5
# how much emphasis on style loss
ALPHA = 100
# the VGG 19_layer pre_trained model
VGG_MODEL = 'data/imagenet-vgg-verydeep-19.mat'
# the mean used when the VGG was trained
# It is subtracted from the input to the VGG nodel
MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))

显示内容图片只是为了了解它是什么样子:

content_image = imageio.imread(CONTENT_IMAGE)
imshow(content_image)
plt.show()

调整风格图像大小,显示它也是为了了解它是什么样子。请注意,内容图像和风格图像现在具有相同的尺寸和相同数量的颜色通道。

style_image = scipy.misc.imread(STYLE_IMAGE)
# Get shape of target and make the style image the same
target_shape = content_image.shape
print('target_shape=', target_shape)
print('style_shape=', style_image.shape)
#ratio = target_shape[1] / style_image.shape[1]
#print "resize ratio=", ratio
style_image = scipy.misc.imresize(style_image, target_shape)
scipy.misc.imsave(STYLE_IMAGE, style_image)
imshow(style_image)
plt.show()

根据论文中的描述定义VGG模型。注意深度学习网络是相当复杂的,一维他将多个ConvNet层与Relu激活函数和最大池化结合在一起。另外值得注意的是,在风格迁移的论文中的许多实验表明,平均池化的表现要好于最大池化。因此,这里使用平均池化:

def load_vgg_model(path, image_height, image_width, color_channels):
    """
    Returns the VGG model as defined in the paper 返回论文中定义的VGG模型
        0 is conv1_1 (3, 3, 3, 64)
        1 is relu
        2 is conv1_2 (3, 3, 64, 64)
        3 is relu
        4 is maxpool
        5 is conv2_1 (3, 3, 64, 128)
        6 is relu
        7 is conv2_2 (3, 3, 128, 128)
        8 is relu
        9 is maxpool
        10 is conv3_1 (3, 3, 128, 256)
        11 is relu
        12 is conv3_2 (3, 3, 256, 256)
        13 is relu
        14 is conv3_3 (3, 3, 256, 256)
        15 is relu
        16 is conv3_4 (3, 3, 256, 256)
        17 is relu
        18 is maxpool
        19 is conv4_1 (3, 3, 256, 512)
        20 is relu
        21 is conv4_2 (3, 3, 512, 512)
        22 is relu
        23 is conv4_3 (3, 3, 512, 512)
        24 is relu
        25 is conv4_4 (3, 3, 512, 512)
        26 is relu
        27 is maxpool
        28 is conv5_1 (3, 3, 512, 512)
        29 is relu
        30 is conv5_2 (3, 3, 512, 512)
        31 is relu
        32 is conv5_3 (3, 3, 512, 512)
        33 is relu
        34 is conv5_4 (3, 3, 512, 512)
        35 is relu
        36 is maxpool
        37 is fullyconnected (7, 7, 512, 4096)
        38 is relu
        39 is fullyconnected (1, 1, 4096, 4096)
        40 is relu
        41 is fullyconnected (1, 1, 4096, 1000)
        42 is softmax
    """
    vgg = scipy.io.loadmat(path)
    vgg_layers = vgg['layers']
    def _weights(layer, expected_layer_name):
        """
               Return the weights and bias from the VGG model for a given layer.
               从VGG模型中返回给定层的权重和偏差。
        """
        W = vgg_layers[0][layer][0][0][0][0][0]
        b = vgg_layers[0][layer][0][0][0][0][1]
        layer_name = vgg_layers[0][layer][0][0][-2]
        assert layer_name == expected_layer_name
        return W, b

    def _relu(conv2d_layer):
        """
                Return the RELU function wrapped over a TensorFlow layer. Expects a
                Conv2d layer input. 返回封装在TensorFlow层中的relu函数,期望一个卷积层的输入
        """
        return tf.nn.relu(conv2d_layer)

    def _conv2d(prev_layer, layer, layer_name):
        """
                Return the Conv2D layer using the weights, biases from the VGG
                model at 'layer'.
        """
        W, b = _weights(layer, layer_name)
        W = tf.constant(W) # 创建常量
        b = tf.constant(np.reshape(b, (b.size)))
        return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b

    def _conv2d_relu(prev_layer, layer, layer_name):
        """
                Return the Conv2D + RELU layer using the weights, biases from the VGG
                model at 'layer'.
        """
        return _relu(_conv2d(prev_layer, layer, layer_name))

    def _avgpool(prev_layer):
        """
                Return the AveragePooling layer.
        """
        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # Constructs the graph model. 构造图模型。
    graph = {}
    graph['input'] = tf.Variable(np.zeros((1, image_height, image_width, color_channels)), dtype='float32')
    graph['conv1_1'] = _conv2d_relu(graph['input'], 0, 'conv1_1')
    graph['conv1_2'] = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
    graph['avgpool1'] = _avgpool(graph['conv1_2'])
    graph['conv2_1'] = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
    graph['conv2_2'] = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
    graph['avgpool2'] = _avgpool(graph['conv2_2'])
    graph['conv3_1'] = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
    graph['conv3_2'] = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
    graph['conv3_3'] = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
    graph['conv3_4'] = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
    graph['avgpool3'] = _avgpool(graph['conv3_4'])
    graph['conv4_1'] = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
    graph['conv4_2'] = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
    graph['conv4_3'] = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
    graph['conv4_4'] = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
    graph['avgpool4'] = _avgpool(graph['conv4_4'])
    graph['conv5_1'] = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
    graph['conv5_2'] = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
    graph['conv5_3'] = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
    graph['conv5_4'] = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
    graph['avgpool5'] = _avgpool(graph['conv5_4'])
    return graph

定义内容损失函数:

def content_loss_func(sess, model):
    """
        Content loss function as defined in the paper.
    """
    def _content_loss(p, x):
        # N is the number of filters (at layer l).
        N = p.shape[3]
        # M is the height times the width of the feature map (at layer l).
        M = p.shape[1] * p.shape[2]

        return (1 / (4 * N * M)) * tf.reduce_sum(tf.pow(x - p, 2))
    return _content_loss(sess.run(model['conv4_2']), model['conv4_2'])

定义那些将要重新使用的VGG层。如果想要更平滑的特征,需要增加更高层的权重(conv5_1),并减少低层(conv1_1)的权重。如果想要提取更尖锐的特征,需要反向操作:

'''
    定义哪些将要重新使用的VGG层。如果想要更平滑的特征,需要增加更高层的权重(conv5_1),
    并减少底层(conv1_1)的权重。如果想要提取更尖锐的特征,需要反向操作
'''
STYLE_LAYERS = [
    ('conv1_1', 0.5),
    ('conv2_1', 1.0),
    ('conv3_1', 1.5),
    ('conv4_1', 3.0),
    ('conv5_1', 4.0),
 ]

定义风格损失函数:

def style_loss_func(sess, model):
    """
        Style loss function as defined in the paper.
        定义风格损失函数
    """
    def _gram_matrix(F, N, M):
        """
                The gram matrix G.
                格拉姆矩阵
        """
        Ft = tf.reshape(F, (M, N))
        return tf.matmul(tf.transpose(Ft), Ft)

    def _style_loss(a, x):
        """
                The style loss calculation.(计算)
        """
        # N is the number of filters (at layer l).
        N = a.shape[3]
        # M is the height times the width of the feature map (at layer l).
        M = a.shape[1] * a.shape[2]
        # A is the style representation of the original image (at layer l).
        A = _gram_matrix(a, N, M)
        # G is the style representation of the generated image (at layer l).
        G = _gram_matrix(x, N, M)
        result = (1 / (4 * N**2 * M**2)) * tf.reduce_sum(tf.pow(G - A, 2))
        return result

    E = [_style_loss(sess.run(model[layer_name]), model[layer_name]) for layer_name, _ in STYLE_LAYERS]
    W = [w for _, w in STYLE_LAYERS]
    loss = sum([W[l] * E[l] for l in range(len(STYLE_LAYERS))])
    return loss

定义一个函数来生成噪声图像,并将其按给定的比例与内容图像混合。定义两个预处理和保存图像的辅助方法:

def generate_noise_image(content_image, noise_ratio = NOISE_RATIO):
    """
       Returns a noise image intermixed with the content image at a certain ratio.
    """
    noise_image = np.random.uniform(
            -20, 20,
            (1,
            content_image[0].shape[0],
            content_image[0].shape[1],
            content_image[0].shape[2])).astype('float32')
    # White noise image from the content representation. Take a weighted average
    # of the values
    input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
    return input_image

# def process_image(image):
#     # Resize the image for convnet input, there is no change but just
#     # add an extra dimension.
#     image = np.reshape(image, ((1,) + image.shape))
#     # Input to the VGG model expects the mean to be subtracted.
#     # VGG模型的输入要求减去平均值
#     image = image - MEAN_VALUES
#     return image
def process_image(path):
    # Resize the image for convnet input, there is no change but just
    # add an extra dimension.
    image = scipy.misc.imread(path)
    image = scipy.misc.imresize(image,(IMAGE_HEIGHT, IMAGE_WIDTH))
    image = np.reshape(image, ((1,) + image.shape))
    # Input to the VGG model expects the mean to be subtracted.
    # VGG模型的输入要求减去平均值
    image = image - MEAN_VALUES
    return image

def save_image(path, image):
    # Output should add back the mean.
    image = image + MEAN_VALUES
    # Get rid of the first useless dimension, what remains is the image.
    image = image[0]
    image = np.clip(image, 0, 255).astype('uint8')
    scipy.misc.imsave(path, image)
# 开始一个tensoflow交互式会话
sess = tf.InteractiveSession()

# 加载已处理的内容图像并显示它
content_image = process_image(CONTENT_IMAGE)
imshow(content_image[0])
plt.show()

# 加载已处理好的风格图像并且显示它
style_image = process_image(STYLE_IMAGE)
imshow(style_image[0])
plt.show()

# 加载模型并且显示它
model = load_vgg_model(VGG_MODEL, style_image[0].shape[0], style_image[0].shape[1], style_image[0].shape[2])
print(model)

# 生成用于引导重绘的随机噪声图像
input_image = generate_noise_image(content_image)
imshow(input_image[0])
plt.show()

# 运行tensorflow会话,初始化全部变量
sess.run(tf.global_variables_initializer())

# 用相应的图像构建content_loss和style_loss
# Construct content_loss using content_image
sess.run(model['input'].assign(content_image))
content_loss = content_loss_func(sess, model)

# Construct style_loss using style_image.
sess.run(model['input'].assign(style_image))
style_loss = style_loss_func(sess, model)

# 内容损失和风格损失的加权组合作为总的损失
# Instantiate equation 7 of the paper.
# 实例化本文的方程7。
total_loss = BETA * content_loss + ALPHA * style_loss

# 建立一个优化器来减少总损失。在这里,采用Adam优化器。
# From the paper: jointly minimize the distance of a white noise image
# from the content representation of the photograph in one layer of
# the neywork and the style representation of the painting in a number
# of layers of the CNN.
#
# The content is built from one layer, while the style is from five
# layers. Then we minimize the total_loss, which is the equation 7.
optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(total_loss)

# 用输入图像来引导网络
# sess.run(tf.global_variables_initializer())
# sess.run(model['input'].assign(input_image))

# Number of iterations to run.
ITERATIONS = 100  # The art.py uses 5000 iterations, and yields far more appealing results. If you can wait, use 5000.

#运行模型进行固定次数的迭代,并生成中间重绘的图像
sess.run(tf.global_variables_initializer())
sess.run(model['input'].assign(input_image))
print('ITERATIONS' + str(ITERATIONS))
for it in range(ITERATIONS):
    sess.run(train_step)
    print(it, '')
    if it % 10 == 0:
        # Print every 100 iteration.
        mixed_image = sess.run(model['input'])
        print('Iteration %d' % it)
        print('sum : ', sess.run(tf.reduce_sum(mixed_image)))
        print('cost : ', sess.run(total_loss))

        if not os.path.exists(OUTPUT_DIR):
            os.mkdir(OUTPUT_DIR)

        filename = 'output/%d.png' % it
        save_image(filename, mixed_image)

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值