原理
结构示意图:
总损失=内容损失+风格损失
将图a的style loss 和 图p的content loss 组合起来,最小化total loss function求得x
L
t
o
t
a
l
(
p
→
,
a
→
,
x
→
)
=
α
L
c
o
n
t
e
n
t
(
p
→
,
x
→
)
+
β
L
s
t
y
l
e
(
a
→
,
x
→
)
{L_{total}}(\overrightarrow {\rm{p}} \;,\overrightarrow a \;,\overrightarrow x \;) = \alpha {L_{content}}(\overrightarrow p ,\overrightarrow x ) + \beta {L_{style}}(\overrightarrow {a\;} ,\overrightarrow x )
Ltotal(p,a,x)=αLcontent(p,x)+βLstyle(a,x)
其中, α, β对应两个loss的权重,调节它们会得到不同的效果。
内容损失函数:
Lcontent采用平方损失函数,为每个像素的损失和
L
c
o
n
t
e
n
t
(
p
→
,
x
→
,
l
)
=
1
2
∑
i
j
(
F
i
j
l
−
P
i
j
l
)
2
{L_{content}}(\overrightarrow {\rm{p}} \;,\overrightarrow x \;,l) = \frac{1}{2}\sum\limits_{ij} {(F_{ij}^l - P_{ij}^l)2}
Lcontent(p,x,l)=21ij∑(Fijl−Pijl)2
损失函数的导数为:
∂
L
c
o
n
t
e
n
t
∂
F
i
j
l
{
0
F
i
j
l
<
0
(
F
l
−
P
l
)
i
j
F
i
j
l
>
0
\frac{{\partial {L_{content}}}}{{\partial F_{ij}^l}}\{ _{0F_{ij}^l < 0}^{{{({F^l} - {P^l})}_{ij}}F_{ij}^l > 0}
∂Fijl∂Lcontent{0Fijl<0(Fl−Pl)ijFijl>0
风格损失函数:
风格损失函数理解上与内容损失函数相同,只是利用了不同层相应的组合表示,作者对于每一层的相应建立了一个格莱姆矩阵G表示他们的特征关联
G
i
j
l
=
∑
k
F
i
j
l
F
i
k
l
G_{ij}^l = \sum\limits_k {F_{ij}^lF_{ik}^l}
Gijl=k∑FijlFikl
l 层的损失为:
E
i
=
1
4
N
l
2
M
l
2
∑
i
j
(
G
i
j
l
−
A
i
j
l
)
2
{E_i} = \frac{1}{{4N_l^2M_l^2}}\sum\limits_{ij} {{{(G_{ij}^l - A_{ij}^l)}^2}}
Ei=4Nl2Ml21ij∑(Gijl−Aijl)2
其中A为原始图像在l层的表示。
则风格损失函数的表示为:
L
s
t
y
l
e
(
a
→
,
x
→
,
l
)
=
∑
l
=
0
L
w
i
E
i
{L_{style}}(\overrightarrow a ,\overrightarrow x ,l) = \sum\limits_{l = 0}^L {{w_i}{E_i}}
Lstyle(a,x,l)=l=0∑LwiEi
Wl 为每层的权重。
导数为:
VGG19结构
实验部分
搭神经网络模型
StyleMigration_models
import typing
import tensorflow as tf
import StyleMigration_settings as settings
def get_vgg19_model(layers):
"""
创建并初始化vgg19模型
:return:
"""
# 加载imagenet上预训练的vgg19
vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
# 提取需要被用到的vgg的层的output
outputs = [vgg.get_layer(layer).output for layer in layers]
# 使用outputs创建新的模型
model = tf.keras.Model([vgg.input, ], outputs)
# 锁死参数,不进行训练
model.trainable = False
return model
class NeuralStyleTransferModel(tf.keras.Model):
def __init__(self, content_layers: typing.Dict[str, float] = settings.CONTENT_LAYERS,
style_layers: typing.Dict[str, float] = settings.STYLE_LAYERS):
super(NeuralStyleTransferModel, self).__init__()
# 内容特征层字典 Dict[层名,加权系数]
self.content_layers = content_layers
# 风格特征层
self.style_layers = style_layers
# 提取需要用到的所有vgg层
layers = list(self.content_layers.keys()) + list(self.style_layers.keys())
# 创建layer_name到output索引的映射
self.outputs_index_map = dict(zip(layers, range(len(layers))))
# 创建并初始化vgg网络
self.vgg = get_vgg19_model(layers)
def call(self, inputs, training=None, mask=None):
"""
前向传播
:return
typing.Dict[str,typing.List[outputs,加权系数]]
"""
outputs = self.vgg(inputs)
# 分离内容特征层和风格特征层的输出,方便后续计算 typing.List[outputs,加权系数]
content_outputs = []
for layer, factor in self.content_layers.items():
content_outputs.append((outputs[self.outputs_index_map[layer]][0], factor))
style_outputs = []
for layer, factor in self.style_layers.items():
style_outputs.append((outputs[self.outputs_index_map[layer]][0], factor))
# 以字典的形式返回输出
return {'content': content_outputs, 'style': style_outputs}
一些设置类参数 ,主要是方便修改
StyleMigration_settings
# 内容特征层及loss加权系数
CONTENT_LAYERS = {'block4_conv2': 0.5, 'block5_conv2': 0.5}
# 风格特征层及loss加权系数
STYLE_LAYERS = {'block1_conv1': 0.2, 'block2_conv1': 0.2, 'block3_conv1': 0.2, 'block4_conv1': 0.2,
'block5_conv1': 0.2}
# 内容图片路径
CONTENT_IMAGE_PATH = 'D:/Code/Python/image/she.jpg'
# 风格图片路径
STYLE_IMAGE_PATH = 'D:/Code/Python/image/start.jpg'
# 生成图片的保存目录
OUTPUT_DIR = 'D:/Code/Python/image/output.jpg'
# 内容loss总加权系数
CONTENT_LOSS_FACTOR = 1
# 风格loss总加权系数
STYLE_LOSS_FACTOR = 100
# 图片宽度
WIDTH = 450
# 图片高度
HEIGHT = 300
# 训练epoch数
EPOCHS = 20
# 每个epoch训练多少次
STEPS_PER_EPOCH = 100
# 学习率
LEARNING_RATE = 0.03
图像的一些预处理
StyleMigration_utils
import tensorflow as tf
import StyleMigration_settings as settings
# 我们准备使用经典网络在imagenet数据集上的与训练权重,所以归一化时也要使用imagenet的平均值和标准差
image_mean = tf.constant([0.485, 0.456, 0.406])
image_std = tf.constant([0.299, 0.224, 0.225])
def normalization(x):
"""
对输入图片x进行归一化,返回归一化的值
"""
return (x - image_mean) / image_std
def load_images(image_path, width=settings.WIDTH, height=settings.HEIGHT):
"""
加载并处理图片
:param image_path: 图片路径
:param width: 图片宽度
:param height: 图片长度
:return: 一个张量
"""
# 加载文件
x = tf.io.read_file(image_path)
# 解码图片
x = tf.image.decode_jpeg(x, channels=3)
# 修改图片大小
x = tf.image.resize(x, [height, width])
x = x / 255.
# 归一化
x = normalization(x)
x = tf.reshape(x, [1, height, width, 3])
# 返回结果
return x
def save_image(image, filename):
x = tf.reshape(image, image.shape[1:])
x = x * image_std + image_mean
x = x * 255.
x = tf.cast(x, tf.int32)
x = tf.clip_by_value(x, 0, 255)
x = tf.cast(x, tf.uint8)
x = tf.image.encode_jpeg(x)
tf.io.write_file(filename, x)
主体部分
StyleMigration_train
# -*- coding: utf-8 -*-
# @File : train.py
# @Author : LiangJinPeng
# @Time : 2020/10/13
# @Desc :
import os
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from StyleMigration_models import NeuralStyleTransferModel
import StyleMigration_settings as settings
import StyleMigration_utils as utils
# 创建模型
model = NeuralStyleTransferModel()
# 加载内容图片
content_image = utils.load_images(settings.CONTENT_IMAGE_PATH)
# 风格图片
style_image = utils.load_images(settings.STYLE_IMAGE_PATH)
# 计算出目标内容图片的内容特征备用
target_content_features = model([content_image, ])['content']
# 计算目标风格图片的风格特征
target_style_features = model([style_image, ])['style']
M = settings.WIDTH * settings.HEIGHT
N = 3
def _compute_content_loss(noise_features, target_features):
"""
计算指定层上两个特征之间的内容loss
:param noise_features: 噪声图片在指定层的特征
:param target_features: 内容图片在指定层的特征
"""
content_loss = tf.reduce_sum(tf.square(noise_features - target_features))
# 计算系数
x = 2. * M * N
content_loss = content_loss / x
return content_loss
def compute_content_loss(noise_content_features):
"""
计算并当前图片的内容loss
:param noise_content_features: 噪声图片的内容特征
"""
# 初始化内容损失
content_losses = []
# 加权计算内容损失
for (noise_feature, factor), (target_feature, _) in zip(noise_content_features, target_content_features):
layer_content_loss = _compute_content_loss(noise_feature, target_feature)
content_losses.append(layer_content_loss * factor)
return tf.reduce_sum(content_losses)
def gram_matrix(feature):
"""
计算给定特征的格拉姆矩阵
"""
# 先交换维度,把channel维度提到最前面
x = tf.transpose(feature, perm=[2, 0, 1])
# reshape,压缩成2d
x = tf.reshape(x, (x.shape[0], -1))
# 计算x和x的逆的乘积
return x @ tf.transpose(x)
def _compute_style_loss(noise_feature, target_feature):
"""
计算指定层上两个特征之间的风格loss
:param noise_feature: 噪声图片在指定层的特征
:param target_feature: 风格图片在指定层的特征
"""
noise_gram_matrix = gram_matrix(noise_feature)
style_gram_matrix = gram_matrix(target_feature)
style_loss = tf.reduce_sum(tf.square(noise_gram_matrix - style_gram_matrix))
# 计算系数
x = 4. * (M ** 2) * (N ** 2)
return style_loss / x
def compute_style_loss(noise_style_features):
"""
计算并返回图片的风格loss
:param noise_style_features: 噪声图片的风格特征
"""
style_losses = []
for (noise_feature, factor), (target_feature, _) in zip(noise_style_features, target_style_features):
layer_style_loss = _compute_style_loss(noise_feature, target_feature)
style_losses.append(layer_style_loss * factor)
return tf.reduce_sum(style_losses)
def total_loss(noise_features):
"""
计算总损失
:param noise_features: 噪声图片特征数据
"""
content_loss = compute_content_loss(noise_features['content'])
style_loss = compute_style_loss(noise_features['style'])
return content_loss * settings.CONTENT_LOSS_FACTOR + style_loss * settings.STYLE_LOSS_FACTOR
# 使用Adma优化器
optimizer = tf.keras.optimizers.Adam(settings.LEARNING_RATE)
# 基于内容图片随机生成一张噪声图片
noise_image = tf.Variable((content_image + np.random.uniform(-0.2, 0.2, (1, settings.HEIGHT, settings.WIDTH, 3))) / 2)
# 使用tf.function加速训练
@tf.function
def train_one_step():
"""
一次迭代过程
"""
# 求loss
with tf.GradientTape() as tape:
noise_outputs = model(noise_image)
loss = total_loss(noise_outputs)
# 求梯度
grad = tape.gradient(loss, noise_image)
# 梯度下降,更新噪声图片
optimizer.apply_gradients([(grad, noise_image)])
return loss
# 创建保存生成图片的文件夹
if not os.path.exists(settings.OUTPUT_DIR):
os.mkdir(settings.OUTPUT_DIR)
# 共训练settings.EPOCHS个epochs
for epoch in range(settings.EPOCHS):
# 使用tqdm提示训练进度
with tqdm(total=settings.STEPS_PER_EPOCH, desc='Epoch {}/{}'.format(epoch + 1, settings.EPOCHS)) as pbar:
# 每个epoch训练settings.STEPS_PER_EPOCH次
for step in range(settings.STEPS_PER_EPOCH):
_loss = train_one_step()
pbar.set_postfix({'loss': '%.4f' % float(_loss)})
pbar.update(1)
# 每个epoch保存一次图片
utils.save_image(noise_image, '{}/{}.jpg'.format(settings.OUTPUT_DIR, epoch + 1))
神经网络风格迁移的代码算是跑了一遍,它给我的感觉吧,就是随机噪声生成的一幅图像,
然后给定风格图像以及原图像一个比例,在神经网络中利用差值作为损失函数来训练神经网络,
修改噪声图像==>最后形成风格迁移图像
风格图像:
原图像
生成图像
小结:
在此实验中,感觉风格图像及原图像的占比是一个比较重要的参数,最后便是传统的超参数微调了。
参考
A Neural Algorithm of Artistic Style
深度学习实战(一)快速理解实现风格迁移
基于tensorflow的图像风格迁移原理与实现