VGG19做风格迁移的图像重绘
计算机如何绘画
1、用神经网络判断一张图片的绘画风格
2、将这种风格转移到另一张重绘的图片上
代码实现步骤
1.导入需要的模块以及预定义文件的路径
from __future__ import division
import scipy.io
import numpy as np
import scipy.misc
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
import os
import sys
OUT_PUT = 'output/'
# style image
STYLE_IMAGE = 'data/StarryNight.jpg'
# content image to be repainted
CONTENT_IMAGE = 'data/Marilyn_Monroe_in_1952.jpg'
2.设置噪声比、VGG模型和VGG预训练已知的平均值,输入图像减去均值再输入到VGG模型,并显示图片展示图片效果
# noise rate
NOISE_RATIO = 0.6
# content loss
BETA = 5
# style loss
ALPHA = 100
# iteration
ITERATION = 1001
IMAGE_WIDTH = 150
IMAGE_HEIGHT = 150
COLOR_CHANNELS = 3
# VGG pre-train model
VGG_MODEL = 'data/imagenet-vgg-verydeep-19.mat'
# mean
MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))
# show image
content_image = scipy.misc.imread(CONTENT_IMAGE)
imshow(content_image)
3.调整风格大小,需要注意的是风格图像和内容图像具有相同的尺寸和颜色通道数
# adjust size
style_img = scipy.misc.imread(STYLE_IMAGE)
# Get shape of target and make the style image the same
target_shape = content_image.shape
print("target_shape=", target_shape)
print("stale_shape=", style_img.shape)
ratio = target_shape[1] / style_img.shape[1]
print("resize ratio=", ratio)
style_img = scipy.misc.imresize(style_img, target_shape)
scipy.misc.imsave(STYLE_IMAGE, style_img)
plt.imshow(style_img)
plt.show()
4.根据VGG模型,并论文提出平均池化好于最大池化,由于本人资源有限没进行全部层的实验,有条件可以加多层数
# VGG
def load_vgg_model(path, image_height, image_width, color_channels):
"""
Return the VGG model as defined in the paper
0 is conv1_1(3, 3, 3, 63)
1 is relu
2 is conv1_2(3, 3, 64, 64)
3 is relu
4 is maxpool
5 is conv2_1(3, 3, 64 ,128)
6 is rule
7 is conv2_2(3, 3, 128, 128)
8 is relu
9 is maxpool
10 is conv3_1(3, 3, 128, 256)
11 is rule
12 is conv3_2(3, 3, 256, 256)
13 is relu
14 is conv3_3(3, 3, 256, 256)
15 is relu
16 is conv3_4(3, 3, 256, 256)
17 is relu
18 is maxpool
19 is conv4_1(3, 3, 256, 512)
20 is relu
21 is conv4_2(3, 3, 512, 512)
22 is relu
23 is conv4_4(3, 3, 512, 512)
24 is relu
25 is conv4_4(3, 3, 512, 512)
26 is relu
27 is maxpool
28 is conv5_1(3, 3, 512, 512)
29 is relu
30 is conv5_2(3, 3, 512, 512)
31 is relu
32 is conv5_3(3, 3, 512, 512)
33 is relu
34 is conv5_4(3, 3, 512, 512)
35 is relu
36 is maxpool
37 is fullyconnected(7, 7, 512, 4096)
38 is relu
39 is fullyconnected(1, 1, 4096, 4096)
40 is relu
41 is fullyconnected(1, 1, 4096, 1000)
42 is softmax
"""
vgg = scipy.io.loadmat(path)
vgg_layers = vgg['layers']
def _weight(layer, expected_layer_name):
"""
:return: the weights and bias from the VGG model for a given layer
"""
w = vgg_layers[0][layer][0][0][0][0][0]
b = vgg_layers[0][layer][0][0][0][0][1]
layer_name = vgg_layers[0][layer][0][0][-2]
# assert layer_name == expected_layer_name
return w, b
def _relu(conv2d_layer):
"""
:param conv2d_layer:
:return: the rule function wrapped over a tensorflow layer.
:except: conv2d layer input
"""
return tf.nn.relu(conv2d_layer)
def _conv2d(prev_layer, layer, layer_name):
"""
:param prev_layer:
:param layer:
:param layer_name:
:return: the conv2d layer using the weights,bias from the VGG
model at 'layer'
"""
w,b = _weight(layer, layer_name)
w = tf.constant(w)
b = tf.constant(np.reshape(b, (b.size)))
return tf.nn.conv2d(prev_layer, filter=w, strides=[1,1,1,1], padding='SAME') + b
def _conv2d_relu(prev_layer, layer, layer_name):
"""
:param prev_layer:
:param layer:
:param layer_name:
:return: the conv2d and relu layer using the weights, biases from the VGG
model at 'layer'
"""
return _relu(_conv2d(prev_layer, layer, layer_name))
def _avgpool(prev_layer):
"""
:param prev_layer:
:return:the averagePooling layer
"""
return tf.nn.avg_pool(prev_layer, ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
# Constructs the graph model
graph = {}
graph['input'] = tf.Variable(np.zeros((1, image_height, image_width, color_channels)), dtype='float32')
graph['conv1_1'] = _conv2d_relu(graph['input'], 0, 'conv1_1')
graph['conv1_2'] = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
graph['avgpool1'] = _avgpool(graph['conv1_2'])
graph['conv2_1'] = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
graph['conv2_2'] = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
graph['avgpool2'] = _avgpool(graph['conv2_2'])
# graph['conv3_1'] = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
# graph['conv3_2'] = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
# graph['conv3_3'] = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
# graph['conv3_4'] = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
# graph['avgpool3'] = _avgpool(graph['conv3_4'])
# graph['conv4_1'] = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
# graph['conv4_2'] = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
# graph['conv4_3'] = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
# graph['conv4_4'] = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
# graph['avgpool4'] = _avgpool(graph['conv4_4'])
# graph['conv5_1'] = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
# graph['conv5_2'] = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
# graph['conv5_3'] = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
# graph['conv5_4'] = _conv2d_relu(graph['conv5_3'], 25, 'conv5_4')
# graph['avgpool5'] = _avgpool(graph['conv5_4'])
return graph
5.论文中定义的内容图像损失函数
# loss
def content_loss_func(sess, model):
"""
content loss function as define in the paper
:param sess:
:param model:
:return:
"""
def _content_loss(p, x):
# N is the number of filters (at layer 1).
N = p.shape[3]
# M is the height times the width of the feature map ( at layer 1)
M = p.shape[1]*p.shape[2]
return (1/(4*N*M))*tf.reduce_sum(tf.pow(x - p ,2))
return _content_loss(sess.run(model['conv2_2']), model['conv2_2'])
6.重新使用要定义的层,要平滑的特征可以增加高层权重,减少低层的权重,如要提取尖锐的特征,反之即可。
# redefinition VGG layer
# add weight of the higher layer
# decay weight of the lower layer
STYLE_LAYERS = [
('conv1_1', 0.5),
('conv2_1', 1.0),
# ('conv3_1', 1.5),
# ('conv4_1', 3.0),
# ('conv5_1', 4.0),
]
7.论文中定义的风格图像损失函数
def style_loss_func(sess, model):
"""
style loss function as defined in the paper
:param sess:
:param model:
:return:
"""
def _gram_matrix(F, M, N):
"""
the gram matrix G.
:param F:
:param M:
:param N:
:return:
"""
Ft = tf.reshape(F, (M, N))
return tf.matmul(tf.transpose(Ft), Ft)
def _style_loss(a,x):
"""
the style loss calculation
:param a:
:param x:
:return:
"""
# N is number of filter (at layer 1).
N = a.shape[3]
# M is the height times the width of the feature map (at layer 1).
M = a.shape[1] * a.shape[2]
# A is the style representation of the original image (at layer 1).
A = _gram_matrix(a, N, M)
# G is the style representation of the generated image (at layer 1).
G = _gram_matrix(x, N, M)
result = (1 / (4*N**2*M**2)) * tf.reduce_sum(tf.pow(G - A, 2))
return result
E = [_style_loss(sess.run(model[layer_name]), model[layer_name]) for layer_name, _ in STYLE_LAYERS]
W = [w for _, w in STYLE_LAYERS]
loss = sum([W[1]*E[1] for l in range(len(STYLE_LAYERS))])
return loss
8.定义噪声图像生成方法 ,并混合比例和图像,同时定义两个处理和保存图像的方法
# generate noise
def generate_noise_image(content_image, noise_ratio=NOISE_RATIO):
"""
:param content_image:
:param noise_ratio:
:return: a nosie image intermixed with the content image at a certain ratio
"""
noise_image = np.random.uniform(-20, 20, (
1,
content_image[0].shape[0],
content_image[0].shape[1],
content_image[0].shape[2])).astype('float32')
# white noise image from the content representation. take a weighted average
# of the values
input_image = noise_image*noise_ratio + content_image*(1 - noise_ratio)
return input_image
def process_image(image):
# resize the image for convent input , there is no change but just
image = np.reshape(image, ((1,) + image.shape))
# input to the VGG model expexts the mean to be subtracted
image = image - MEAN_VALUES
return image
def save_image(path, image):
# output should add back the mean
image = image + MEAN_VALUES
# get rid of the first useless dimension, what remains is the image.
image = image[0]
image = np.clip(image, 0, 255).astype('uint8')
scipy.misc.imsave(path, image)
def load_image(path):
image = scipy.misc.imread(path)
image = scipy.misc.imresize(image, (IMAGE_HEIGHT, IMAGE_WIDTH))
image = np.reshape(image, ((1,) + image.shape))
# Input to the VGG net expects the mean to be subtracted.
image = image - MEAN_VALUES
return image
9.开始会话
# start session
sess = tf.InteractiveSession()
# load image
content_image = load_image(CONTENT_IMAGE)
imshow(content_image[0])
plt.show()
# load model
style_img = load_image(STYLE_IMAGE)
print(style_img[0].shape[2])
model = load_vgg_model(VGG_MODEL, style_img[0].shape[0], style_img[0].shape[1], style_img[0].shape[2])
print(model)
# draw noise
input_image = generate_noise_image(content_image)
imshow(input_image[0])
plt.show()
sess.run(tf.initialize_all_variables())
# construct content_loss using content_image
sess.run(model['input'].assign(content_image))
content_loss = content_loss_func(sess, model)
# construct style_loss using style_image
sess.run(model['input'].assign(style_img))
style_loss = style_loss_func(sess, model)
# add loss
total_loss = BETA * content_loss + ALPHA * style_loss
# minimize loss
optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(total_loss)
# input
sess.run(tf.initialize_all_variables())
sess.run(model['input'].assign(input_image))
print("start iteration")
plt.show()
for it in range(ITERATION):
sess.run(train_step)
print(it,"")
if it % 100 == 0:
# print
mixed_image = sess.run(model["input"])
print("inter %d", it)
print("sum:",sess.run(tf.reduce_sum(mixed_image)))
print(("cost:", sess.run(total_loss)))
if not os.path.exists(OUT_PUT):
os.mkdir(OUT_PUT)
filename = 'output/%d.png' % it
save_image(filename, mixed_image)
源码链接点击这里