关于神经风格迁移的基本理解-使用tensorflow进行神经风格迁移

最新推荐文章于 2023-01-09 08:19:12 发布

Sun_runer

最新推荐文章于 2023-01-09 08:19:12 发布

阅读量803

点赞数

文章标签： python 深度学习人工智能机器学习

本文链接：https://blog.csdn.net/Sun_runer/article/details/104518446

版权

神经风格迁移
我们所有人都使用过在智能手机中的相机滤镜，或者有时我们安装了不同的滤镜应用程序，以使照片看起来更具光泽感和艺术感。但是，您是否想知道这些过滤器实际上是如何工作的？这种艺术外观如何出现？

所有这些都是由于智能的算法。
现在，什么是风格迁移？
风格迁移是一种以另一幅图像的样式重建图像的技术。
关于这个主题的文章有很多文章，其中介绍了如何使用神经网络来生成艺术风格的图像，每位作者都阐述了他们自己的创新方法，即优化和损失函数来创建艺术图像生成的神经网络。
在研究了有关神经风格转换的各种作品以及如何构建它们之后，在本博客中，我一步步地地解释了如何实际构建基本的神经风格转换。

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np
import os
from keras import backend as K
from keras.preprocessing.image import load_img, save_img, img_to_array
import matplotlib.pyplot as plt
from keras.applications import vgg19
from keras.models import Model
#from keras import optimizers
from scipy.optimize import fmin_l_bfgs_b
#from keras.applications.vgg19 import VGG19
#vgg19_weights = '../input/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5'
#vgg19 = VGG19(include_top = False, weights=vgg19_weights)
print(os.listdir("../input"))

指定输入图片路径

StylePath = '../input/best-artworks-of-all-time/images/images/'
ContentPath = '../input/image-classification/validation/validation/travel and adventure/'
base_image_path = ContentPath+'13.jpg'
style_image_path = StylePath+'Pablo_Picasso/Pablo_Picasso_92.jpg'

# 生成图片的维度
width, height = load_img(base_image_path).size
img_nrows = 400
img_ncols = int(width * img_nrows / height)

接下来的函数通过vgg19模型对图像进行预处理

def preprocess_image(image_path):
    from keras.applications import vgg19
    img = load_img(image_path, target_size=(img_nrows, img_ncols))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg19.preprocess_input(img)
    return img

内容图片和风格图片的展示

在这里插入图片描述

图像的张量表示

# get tensor representations of our images
base_image = K.variable(preprocess_image(base_image_path))
style_reference_image = K.variable(preprocess_image(style_image_path))

生成图片的初始化

if K.image_data_format() == 'channels_first':
    combination_image = K.placeholder((1,3,img_nrows, img_ncols))
else:
    combination_image = K.placeholder((1,img_nrows, img_ncols,3))

将三张图片合成一个keras张量

input_tensor = K.concatenate([base_image,
                              style_reference_image,
                              combination_image
                              ], axis=0)

构建VGG19模型

# build the VGG19 network with our 3 images as input
# the model will be loaded with pre-trained ImageNet weights
from keras.applications.vgg19 import VGG19
vgg19_weights = '../input/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5'
model = VGG19(input_tensor=input_tensor,
              include_top = False,
              weights=vgg19_weights)
#model = vgg19.VGG19(input_tensor=input_tensor,
#                    weights='imagenet', include_top=False)
print('Model loaded.')

我们只用VGG19中的中间层

# Content layer where will pull our feature maps
content_layers = ['block5_conv2'] 

# Style layer we are interested in
style_layers = ['block1_conv1',
                'block2_conv1',
                'block3_conv1', 
                'block4_conv1',
                'block5_conv1'
               ]

num_content_layers = len(content_layers)
num_style_layers = len(style_layers)
# 生成3个（25，33，512）的特征图
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
print(outputs_dict['block5_conv2'])

内容损失

给定选定的输出层l ，内容损失定义为我们内容图像C的特征图F和我们生成的图像Y的特征图P之间的均方误差。

# an auxiliary loss function
# designed to maintain the "content" of the
# base image in the generated image
def get_content_loss(base_content, target):
    return K.sum(K.square(target - base_content))

风格损失函数

首先，我们需要为风格层输出的张量计算Gram-matrix（包含相关特征的矩阵）。 Gram矩阵本质上只是风格层的特征激活向量的点积矩阵。

如果Gram矩阵中的条目的值接近零，则意味着给定风格图像的给定图层中的两个要素不会同时激活。反之亦然，如果Gram矩阵中的条目具有较大的值，则意味着这两个功能确实会同时激活给定样式图像。然后，我们将尝试创建一个混合图像，该图像复制样式图像的激活模式。

import tensorflow as tf
# the gram matrix of an image tensor (feature-wise outer product)
def gram_matrix(input_tensor):
    assert K.ndim(input_tensor)==3
    #if K.image_data_format() == 'channels_first':
    #    features = K.batch_flatten(input_tensor)
    #else:
    #    features = K.batch_flatten(K.permute_dimensions(input_tensor,(2,0,1)))
    #gram = K.dot(features, K.transpose(features))
    channels = int(input_tensor.shape[-1])
    a = tf.reshape(input_tensor, [-1, channels])
    n = tf.shape(a)[0]
    gram = tf.matmul(a, a, transpose_a=True)
    return gram#/tf.cast(n, tf.float32)

def get_style_loss(style, combination):
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_nrows*img_ncols
    return K.sum(K.square(S - C))#/(4.0 * (channels ** 2) * (size ** 2))

计算总的损失函数

content_weight=0.025 
style_weight=1.0
# combine these loss functions into a single scalar
loss = K.variable(0.0)
layer_features = outputs_dict['block5_conv2']
base_image_features = layer_features[0, :, :, :]
combination_features = layer_features[2, :, :, :]
print('Layer Feature for Content Layers :: '+str(layer_features))
print('Base Image Feature :: '+str(base_image_features))
print('Combination Image Feature for Content Layers:: '+str(combination_features)+'\n')
loss += content_weight * get_content_loss(base_image_features,
                                      combination_features)

feature_layers = ['block1_conv1', 'block2_conv1',
                  'block3_conv1', 'block4_conv1',
                  'block5_conv1']
for layer_name in feature_layers:
    layer_features = outputs_dict[layer_name]
    style_reference_features = layer_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    print('Layer Feature for Style Layers :: '+str(layer_features))
    print('Style Image Feature :: '+str(style_reference_features))
    print('Combination Image Feature for Style Layers:: '+str(combination_features)+'\n')
    sl = get_style_loss(style_reference_features, combination_features)
    loss += (style_weight / len(feature_layers)) * sl

###此deprocess_image函数用于返回转换后的最终图像的原始格式，可以由Matplotlib轻松读取和显示。

def deprocess_image(x):
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, img_nrows, img_ncols))
        x = x.transpose((1, 2, 0))
    else:
        x = x.reshape((img_nrows, img_ncols, 3))
    # Remove zero-center by mean pixel
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    # 'BGR'->'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x

关于loss的梯度计算。

# get the gradients of the generated image wrt the loss
grads = K.gradients(loss, combination_image)
grads

outputs = [loss]
if isinstance(grads, (list,tuple)):
    outputs += grads
else:
    outputs.append(grads)
f_outputs = K.function([combination_image], outputs)
f_outputs

尽管有各种优化器，但在这种情况下我们使用了L-BFGS优化器

# run scipy-based optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the neural style loss
x_opt = preprocess_image(base_image_path)

def eval_loss_and_grads(x):
    if K.image_data_format() == 'channels_first':
        x = x.reshape((1, 3, img_nrows, img_ncols))
    else:
        x = x.reshape((1, img_nrows, img_ncols, 3))
    outs = f_outputs([x])
    loss_value = outs[0]
    if len(outs[1:]) == 1:
        grad_values = outs[1].flatten().astype('float64')
    else:
        grad_values = np.array(outs[1:]).flatten().astype('float64')
    return loss_value, grad_values

该Evaluator类的目的是为了避免在运行L-BFGS优化器以使损失最小化时，使用optimize.minimize 产生的错误’numpy.ndarray’对象不是可调用错误。

class Evaluator(object):

    def __init__(self):
        self.loss_value = None
        self.grads_values = None

    def loss(self, x):
        assert self.loss_value is None
        loss_value, grad_values = eval_loss_and_grads(x)
        self.loss_value = loss_value
        self.grad_values = grad_values
        return self.loss_value

    def grads(self, x):
        assert self.loss_value is not None
        grad_values = np.copy(self.grad_values)
        self.loss_value = None
        self.grad_values = None
        return grad_values

evaluator = Evaluator()

iterations=400
# Store our best result
best_loss, best_img = float('inf'), None
for i in range(iterations):
    print('Start of iteration', i)
    x_opt, min_val, info= fmin_l_bfgs_b(evaluator.loss, 
                                        x_opt.flatten(), 
                                        fprime=evaluator.grads,
                                        maxfun=20,
                                        disp=True,
                                       )
    print('Current loss value:', min_val)
    if min_val < best_loss:
        # Update best loss and best image from total loss. 
        best_loss = min_val
        best_img = x_opt.copy()

最终的图片以及迁移后的效果

# save current generated image
imgx = deprocess_image(best_img.copy())
plt.imshow(imgx)

在这里插入图片描述

plt.figure(figsize=(30,30))
plt.subplot(5,5,1)
plt.title("Base Image",fontsize=20)
img_base = load_img(base_image_path)
plt.imshow(img_base)

plt.subplot(5,5,1+1)
plt.title("Style Image",fontsize=20)
img_style = load_img(style_image_path)
plt.imshow(img_style)

plt.subplot(5,5,1+2)
plt.title("Final Image",fontsize=20)
plt.imshow(imgx)

在这里插入图片描述

Sun_runer

关注

0
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
关于神经风格迁移的基本理解-使用tensorflow进行神经风格迁移

我们所有人都使用过在智能手机中的相机滤镜，或者有时我们安装了不同的滤镜应用程序，以使照片看起来更具光泽感和艺术感。但是，您是否想知道这些过滤器实际上是如何工作的？这种艺术外观如何出现？所有这些都是由于智能的算法。现在，什么是风格迁移？风格迁移是一种以另一幅图像的样式重建图像的技术。关于这个主题的文章有很多文章，其中介绍了如何使用神经网络来生成艺术风格的图像，每位作者都阐述了他们自己的...
复制链接

扫一扫