实验目的
掌握如何使用Tensorflow实现风格迁移算法的训练
- 掌握使用Tensorflow定义损失函数的方法
- 掌握使用Tensorflow存储网络模型的方法
- 以实时风格迁移为例,掌握使用Tenflow进行神经网络训练的方法(推荐使用GPU进行训练)
实验代码
- transform.py:定义基本运算单元
#encoding=utf-8
import tensorflow as tf, pdb
WEIGHTS_INIT_STDEV = .1
def net(image, type=0):
# 该函数构建图像转换网络,image 为步骤 1 中读入的图像 ndarray 阵列,返回最后一层的输出结果
# TODO:构建图像转换网络,每一层的输出作为下一层的输入
conv1 = _conv_layer(image, 32, 9, 1, type) #default relu=True and type=0(use BN)
conv2 = _conv_layer(conv1, 64 ,3, 2, type)
conv3 = _conv_layer(conv2, 128 ,3, 2, type)
# print(conv1.get_shape())
# print(conv2.get_shape())
# print(conv3.get_shape())
# pdb.set_trace()
residual1 = _residual_block(conv3, 3, type=1)
residual2 = _residual_block(residual1, 3, type=1)
residual3 = _residual_block(residual2, 3, type=1)
residual4 = _residual_block(residual3, 3, type=1)
residual5 = _residual_block(residual4, 3, type=1)
conv_transpose1 = _conv_tranpose_layer(residual5, 64, 3, 2)
conv_transpose2 = _conv_tranpose_layer(conv_transpose1, 32, 3, 2)
conv4 = _conv_layer(conv_transpose2, 3, 9, 1)
#TODO:最后一个卷积层的输出再经过 tanh 函数处理,最后的输出张量 preds 像素值需限定在 [0,255] 范围内
preds = (tf.nn.tanh(conv4) + 1) * 255./2
return preds
def _conv_layer(net, num_filters, filter_size, strides, relu=True, type=0):
# 该函数定义了卷积层的计算方法,net 为该卷积层的输入 ndarray 数组,num_filters 表示输出通道数,filter_size 表示卷积核尺
# 寸,strides 表示卷积步长,该函数最后返回卷积层计算的结果
# TODO:准备好权重的初值
weights_init = _conv_init_vars(net, num_filters, filter_size) #weights shape: [k, k, cin, cout]
# TODO:输入的 strides 参数为标量,需将其处理成卷积函数能够使用的数据形式
strides_shape = [1] + list([strides]) * 2 + [1]
# TODO:进行卷积计算
net = tf.nn.conv2d(net, weights_init, strides=strides_shape, padding='SAME') + \
tf.Variable(tf.zeros([num_filters], dtype=tf.float32)) #bias
# 对卷积计算结果进行批归一化处理
if type == 0:
net = _batch_norm(net)
elif type == 1:
net = _instance_norm(net)
if relu:
# TODO:对归一化结果进行 ReLU 操作
net = tf.nn.relu(net)
return net
def _conv_tranpose_layer(net, num_filters, filter_size, strides, type=0):
#referrence to https://blog.csdn.net/baidu_33216040/article/details/102575278
# TODO:准备好权重的初值
weights_init = _conv_init_vars(net, num_filters, filter_size, transpose=True) #weights shape: [k, k, num_filters, cin]
batch, rows, cols, channels = [i.value for i in net.get_shape()]
# handmade mode: see more from experiment document
# #1.flat InputData to: [batchsize, width*height, channel]
# batch, rows, cols, channels = [i.value for i in net.get_shape()] #net shape: [batchsize, width, height, channel]
# net = tf.reshape(net, [batch, rows*cols, -1])
# #2. k*k kernel to (rows*cols, square((rows-1)*s+k))
# #4.y = W.T * x
# TODO:输入的 num_filters、strides 参数为标量,需将其处理成转置卷积函数能够使用的数据形式
strides_shape = [1] + list([strides]) * 2 + [1]
# output_shape = [batch] + list((rows-1)*strides+filter_size)* 2 + [num_filters] #padding = VALID
new_shape = [batch, rows*strides, cols*strides, num_filters] #padding = SAME #only 2
# if tf.__version__.startswith('0.1'):
# output_shape = tf.pack(new_shape)
# else:
output_shape = tf.stack(new_shape)
# TODO:进行转置卷积计算
net = tf.nn.conv2d_transpose(net, weights_init, output_shape, strides_shape, padding='SAME') + \
tf.Variable(tf.zeros([num_filters], dtype=tf.float32)) #bias
# 对卷积计算结果进行批归一化处理
if type == 0:
net = _batch_norm(net)
elif type == 1:
net = _instance_norm(net)
# TODO:对归一化结果进行 ReLU 操作
net = tf.nn.relu(net)
return net
def _residual_block(net, filter_size=3, type=0):
# TODO:调用之前实现的卷积层函数,实现残差块的计算
#在该网络中,残差层的卷积核个数为128, strids=1;且网络未改变输出通道数(可采用1*1卷积改通道数)
x_shape = net.get_shape()
tmp = _conv_layer(net, 128, filter_size, 1, True) #conv1 + relu
fx = _conv_layer(tmp, 128, filter_size, 1) #conv2
fx_shape = fx.get_shape()
assert x_shape[1] == fx_shape[1] and x_shape[2] == fx_shape[2], "x_shape[1] = %d, fx_shape[1] = %d" % (x_shape[1].value, fx_shape[1].value)
#todo
if not(x_shape[1] == fx_shape[1] and x_shape[2] == fx_shape[2]):#feature size改变,通过补零的方式
pass
net = tf.nn.relu(net+fx)
return net
def _batch_norm(net, train=True):
batch, rows, cols, channels = [i.value for i in net.get_shape()]
axes=list(range(len(net.get_shape())-1))
mu, sigma_sq = tf.nn.moments(net, axes, keep_dims=True)
var_shape = [channels]
shift = tf.Variable(tf.zeros(var_shape)) #learnable parameter
scale = tf.Variable(tf.ones(var_shape))
epsilon = 1e-3
return tf.nn.batch_normalization(net, mu, sigma_sq, shift, scale, epsilon)
def _instance_norm(net, train=True):
batch, rows, cols, channels = [i.value for i in net.get_shape()]
var_shape = [channels]
mu, sigma_sq = tf.nn.moments(net, [1,2], keep_dims=True)
shift = tf.Variable(tf.zeros(var_shape))
scale = tf.Variable(tf.ones(var_shape))
epsilon = 1e-3
normalized = (net-mu)/(sigma_sq + epsilon)**(.5)
return scale * normalized + shift
def _conv_init_vars(net, out_channels, filter_size, transpose=False):
_, rows, cols, in_channels = [i.value for i in net.get_shape()]
if not transpose:
weights_shape = [filter_size, filter_size, in_channels, out_channels]
else:
weights_shape = [filter_size, filter_size, out_channels, in_channels]
weights_init = tf.Variable(tf.truncated_normal(weights_shape, stddev=WEIGHTS_INIT_STDEV, seed=1), dtype=tf.float32)
return weights_init
# net(tf.placeholder(tf.float32, shape=[1, 336, 336, 3]))
- vgg.py:定义特征提取网络
#encoding=utf-8
# Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3.
import tensorflow as tf
import numpy as np
import scipy.io
import pdb
MEAN_PIXEL = np.array([ 123.68 , 116.779, 103.939])
def net(data_path, input_image):
layers = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
'relu5_3', 'conv5_4', 'relu5_4'
)
data = scipy.io.loadmat(data_path)
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))
weights = data['layers'][0]
net = {}
current = input_image
for i, name in enumerate(layers):
kind = name[:4]
if kind == 'conv':
# TODO:如果当前层为卷积层,则进行卷积计算,计算结果为 current
kernels, bias = weights[i][0][0][0][0]
kernels = np.transpose(kernels, [1,0,2,3])
bias = bias.reshape(-1)
current = _conv_layer(current, kernels, bias)
elif kind == 'relu':
# TODO:如果当前层为 ReLU 层,则进行 ReLU 计算,计算结果为 current
current = tf.nn.relu(current)
elif kind == 'pool':
# TODO:如果当前层为池化层,则进行最大池化计算,计算结果为 current
current = _pool_layer(current)
net[name] = current
assert len(net) == len(layers)
return net
def _conv_layer(input, weights, bias):
conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
padding='SAME')
return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
padding='SAME')
def preprocess(image):
return image - MEAN_PIXEL
def unprocess(image):
return image + MEAN_PIXEL
- optimaze.py:损失函数的构建
#encoding=utf-8
from __future__ import print_function
import functools
import vgg, pdb, time
import tensorflow as tf, numpy as np, os
import transform
from utils import get_img
STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
CONTENT_LAYER = 'relu4_2'
DEVICES = '/cpu:0' #'CUDA_VISIBLE_DEVICES'
os.putenv('MLU_VISIBLE_DEVICES','')
def loss_function(net, content_features, style_features, content_weight, style_weight, tv_weight, preds, batch_size):
# 损失函数构建,net 为特征提取网络,content_features 为内容图像特征,style_features 为风格图像特征,content_weight、
# style_weight 和 tv_weight 分别为特征重建损失、风格重建损失的权重和全变分正则化损失的权重
batch_shape = (batch_size,256,256,3)
# 计算内容损失
# content_loss
content_size = _tensor_size(content_features[CONTENT_LAYER])*batch_size
assert _tensor_size(content_features[CONTENT_LAYER]) == _tensor_size(net[CONTENT_LAYER])
content_loss = (1.0 / (4*content_size)) * tf.reduce_sum(tf.pow(net[CONTENT_LAYER]-content_features[CONTENT_LAYER], 2)) * content_weight
# 计算风格损失
# style_loss
style_losses = []
for style_layer in STYLE_LAYERS:
layer = net[style_layer]
bs, height, width, filters = map(lambda i:i.value,layer.get_shape())
size = height * width * filters
feats = tf.reshape(layer, (bs, height * width, filters))
feats_T = tf.transpose(feats, perm=[0,2,1])
grams = tf.matmul(feats_T, feats) / size
style_gram = style_features[style_layer]
# TODO: 计算 style_losses
style_losses.append((1.0 / (4 * bs ** 2 * size ** 2)) * tf.reduce_sum(tf.pow(style_gram-grams, 2)) * style_weight)
style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size
# 使用全变分正则化方法定义损失函数 tv_loss
# tv_loss
tv_y_size = _tensor_size(preds[:,1:,:,:])
tv_x_size = _tensor_size(preds[:,:,1:,:])
# TODO:将图像 preds 向水平和垂直方向各平移一个像素,分别与原图相减,分别计算二者的 𝐿2 范数 x_tv 和 y_tv
# Hint: use tf.nn.l2_loss
y_tv = tf.nn.l2_loss(preds[:,1:,:,:]-preds[:,:batch_shape[1]-1,:,:])
x_tv = tf.nn.l2_loss(preds[:,1:,:,:]-preds[:,:batch_shape[1]-1,:,:])
tv_loss = tv_weight*2*(x_tv/tv_x_size + y_tv/tv_y_size)/batch_size
loss = content_loss + style_loss + tv_loss
return content_loss, style_loss, tv_loss, loss
#np arr, np arr
def optimize(content_targets, style_target, content_weight, style_weight,
tv_weight, vgg_path, epochs=2, print_iterations=1000,
batch_size=4, save_path='saver/fns.ckpt', slow=False,
learning_rate=1e-3, debug=False, type=0, save=True):
# 实时风格迁移训练方法定义,content_targets 为内容图像, style_target 为风格图像, content_weight、style_weight 和 tv_weight 分别为
# 特征重建损失、风格重建损失和全变分正则化项的权重,vgg_path 为保存 VGG19 网络参数的文件路径
if slow:
batch_size = 1
mod = len(content_targets) % batch_size
if mod > 0:
print("Train set has been trimmed slightly..")
content_targets = content_targets[:-mod]
# 风格特征预处理
style_features = {}
batch_shape = (batch_size,256,256,3)
style_shape = (1,) + style_target.shape
print(style_shape)
# precompute style features
with tf.Graph().as_default(), tf.device('/cpu:0'), tf.Session() as sess:
# 使用 numpy 库在 CPU 上处理
# TODO:使用占位符来定义风格图像 style_image
style_image = tf.placeholder(tf.float32, shape=style_shape)
#TODO: 依次调用 vgg.py 文件中的 preprocess()、net() 函数对风格图像进行预处理,并将此时得到的特征提取网络传递给 net
net = vgg.net(vgg_path, vgg.preprocess(style_image))
# 使用 numpy 库对风格图像进行预处理,定义风格图像的格拉姆矩阵
style_pre = np.array([style_target])
for layer in STYLE_LAYERS:
features = net[layer].eval(feed_dict={style_image:style_pre})
features = np.reshape(features, (-1, features.shape[3]))
gram = np.matmul(features.T, features) / features.size
style_features[layer] = gram
#TODO:先使用占位符来定义内容图像 X_content,再调用 preprocess() 函数对 X_content 进行预处理,生成 X_pre
X_content = tf.placeholder(tf.float32, shape=batch_shape)
X_pre = vgg.preprocess(X_content)
# 提取内容特征对应的网络层
# precompute content features
content_features = {}
content_net = vgg.net(vgg_path, X_pre)
content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]
if slow:
preds = tf.Variable(
tf.random_normal(X_content.get_shape()) * 0.256
)
preds_pre = preds
else:
# TODO: 内容图像经过图像转换网络后输出结果 preds,并调用 preprocess() 函数对 preds 进行预处理, 生成 preds_pre
preds = transform.net(X_content)
preds_pre = vgg.preprocess(preds)
# TODO:preds_pre 输入到特征提取网络,并将此时得到的特征提取网络传递给 net
net = vgg.net(vgg_path, preds_pre)
# TODO:计算内容损失 content_loss, 风格损失 style_loss, 全变分正则化项 tv_loss, 损失函数 loss
content_loss, style_loss, tv_loss, loss = loss_function(net, content_features, style_features, content_weight, style_weight, tv_weight, preds, batch_size)
# TODO:创建 Adam 优化器,并定义模型训练方法为最小化损失函数方法,返回 train_step
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
# TODO:初始化所有变量
sess.run(tf.global_variables_initializer())
import random
uid = random.randint(1, 100)
print("UID: %s" % uid)
for epoch in range(epochs):
num_examples = len(content_targets)
iterations = 0
while iterations * batch_size < num_examples:
start_time = time.time()
curr = iterations * batch_size
step = curr + batch_size
X_batch = np.zeros(batch_shape, dtype=np.float32)
for j, img_p in enumerate(content_targets[curr:step]):
X_batch[j] = get_img(img_p, (256,256,3)).astype(np.float32)
iterations += 1
assert X_batch.shape[0] == batch_size
feed_dict = {
X_content:X_batch
}
train_step.run(feed_dict=feed_dict)
end_time = time.time()
delta_time = end_time - start_time
if debug:
print("UID: %s, batch time: %s" % (uid, delta_time))
print('iteration: %d'%iterations)
is_print_iter = int(iterations) % print_iterations == 0
if slow:
is_print_iter = epoch % print_iterations == 0
is_last = epoch == epochs - 1 and iterations * batch_size >= num_examples
should_print = is_print_iter
if should_print:
to_get = [style_loss, content_loss, tv_loss, loss, preds]
test_feed_dict = {
X_content:X_batch
}
tup = sess.run(to_get, feed_dict = test_feed_dict)
_style_loss,_content_loss,_tv_loss,_loss,_preds = tup
losses = (_style_loss, _content_loss, _tv_loss,_loss)
if slow:
_preds = vgg.unprocess(_preds)
elif save:
# TODO:将模型参数保存到 save_path,并将训练的次数 save_id 作为后缀加入到模型名字中
saver = tf.train.Saver()
saver.save(sess, save_path+str(iterations))
# 将相关计算结果返回
yield(_preds, losses, iterations, epoch)
def _tensor_size(tensor):
# 对张量进行切片操作,将 NHWC 格式的张量,切片成 HWC,再计算 H、W、C 的乘积
from operator import mul
return functools.reduce(mul, (d.value for d in tensor.get_shape()[1:]), 1)
- style.py:实时风格迁移的训练
#encoding=utf-8
from __future__ import print_function
import sys, os, pdb
sys.path.insert(0, 'src')
import numpy as np, scipy.misc
from optimize import optimize
from argparse import ArgumentParser
from utils import save_img, get_img, exists, list_files
import evaluate
os.putenv('MLU_VISIBLE_DEVICES','')
CONTENT_WEIGHT = 7.5e0
STYLE_WEIGHT = 1e2
TV_WEIGHT = 2e2
LEARNING_RATE = 1e-3
NUM_EPOCHS = 2
CHECKPOINT_DIR = 'checkpoints'
CHECKPOINT_ITERATIONS = 2000
VGG_PATH = 'data/imagenet-vgg-verydeep-19.mat'
TRAIN_PATH = 'data/train2014_small'
BATCH_SIZE = 4
DEVICE = '/cpu:0'
FRAC_GPU = 1
def build_parser():
parser = ArgumentParser()
parser.add_argument('--checkpoint-dir', type=str,
dest='checkpoint_dir', help='dir to save checkpoint in',
metavar='CHECKPOINT_DIR', required=True)
parser.add_argument('--style', type=str,
dest='style', help='style image path',
metavar='STYLE', required=True)
parser.add_argument('--train-path', type=str,
dest='train_path', help='path to training images folder',
metavar='TRAIN_PATH', default=TRAIN_PATH)
parser.add_argument('--test', type=str,
dest='test', help='test image path',
metavar='TEST', default=False)
parser.add_argument('--test-dir', type=str,
dest='test_dir', help='test image save dir',
metavar='TEST_DIR', default=False)
parser.add_argument('--slow', dest='slow', action='store_true',
help='gatys\' approach (for debugging, not supported)',
default=False)
parser.add_argument('--epochs', type=int,
dest='epochs', help='num epochs',
metavar='EPOCHS', default=NUM_EPOCHS)
parser.add_argument('--batch-size', type=int,
dest='batch_size', help='batch size',
metavar='BATCH_SIZE', default=BATCH_SIZE)
parser.add_argument('--checkpoint-iterations', type=int,
dest='checkpoint_iterations', help='checkpoint frequency',
metavar='CHECKPOINT_ITERATIONS',
default=CHECKPOINT_ITERATIONS)
parser.add_argument('--vgg-path', type=str,
dest='vgg_path',
help='path to VGG19 network (default %(default)s)',
metavar='VGG_PATH', default=VGG_PATH)
parser.add_argument('--content-weight', type=float,
dest='content_weight',
help='content weight (default %(default)s)',
metavar='CONTENT_WEIGHT', default=CONTENT_WEIGHT)
parser.add_argument('--style-weight', type=float,
dest='style_weight',
help='style weight (default %(default)s)',
metavar='STYLE_WEIGHT', default=STYLE_WEIGHT)
parser.add_argument('--tv-weight', type=float,
dest='tv_weight',
help='total variation regularization weight (default %(default)s)',
metavar='TV_WEIGHT', default=TV_WEIGHT)
parser.add_argument('--learning-rate', type=float,
dest='learning_rate',
help='learning rate (default %(default)s)',
metavar='LEARNING_RATE', default=LEARNING_RATE)
parser.add_argument('--type', type=int,
dest='type',
help='type==0, use batch norm; type==1, use instance norm',
metavar='TYPE', default=0)
parser.add_argument('--save', dest='save', action='store_true',
help='whether save ckpt or not',
default=True)
return parser
def check_opts(opts):
exists(opts.checkpoint_dir, "checkpoint dir not found!")
exists(opts.style, "style path not found!")
exists(opts.train_path, "train path not found!")
if opts.test or opts.test_dir:
exists(opts.test, "test img not found!")
exists(opts.test_dir, "test directory not found!")
exists(opts.vgg_path, "vgg network data not found!")
assert opts.epochs > 0
assert opts.batch_size > 0
assert opts.checkpoint_iterations > 0
assert os.path.exists(opts.vgg_path)
assert opts.content_weight >= 0
assert opts.style_weight >= 0
assert opts.tv_weight >= 0
assert opts.learning_rate >= 0
def _get_files(img_dir):
files = list_files(img_dir)
return [os.path.join(img_dir,x) for x in files]
def main():
parser = build_parser()
options = parser.parse_args()
check_opts(options)
# TODO:获取风格图像 style_target 以及内容图像数组 content_targets
style_target = get_img(options.style)
if not options.slow:
content_targets = _get_files(options.train_path)
elif options.test:
content_targets = [options.test]
kwargs = {
# "slow":options.slow,
"epochs":options.epochs,
"print_iterations":options.checkpoint_iterations,
"batch_size":options.batch_size,
"save_path":os.path.join(options.checkpoint_dir,'fns.ckpt'),
"learning_rate":options.learning_rate,
"type":options.type,
"save":options.save
}
if options.slow:
if options.epochs < 10:
kwargs['epochs'] = 1000
if options.learning_rate < 1:
kwargs['learning_rate'] = 1e1
args = [
content_targets,
style_target,
options.content_weight,
options.style_weight,
options.tv_weight,
options.vgg_path
]
for preds, losses, i, epoch in optimize(*args, **kwargs):
style_loss, content_loss, tv_loss, loss = losses
print('Epoch %d, Iteration: %d, Loss: %s' % (epoch, i, loss))
to_print = (style_loss, content_loss, tv_loss)
print('style: %s, content:%s, tv: %s' % to_print)
if options.test:
assert options.test_dir != False
preds_path = '%s/%s_%s.png' % (options.test_dir,epoch,i)
if not options.slow:
ckpt_dir = os.path.dirname(options.checkpoint_dir)
evaluate.ffwd_to_img(options.test,preds_path,
options.checkpoint_dir)
else:
save_img(preds_path, img)
ckpt_dir = options.checkpoint_dir
print("Training complete.")
if __name__ == '__main__':
main()
测试输出结果
原图
风格图像
迁移结果