这里也有好几个注意点,踩坑花了好长时间。
首先导包,导模型,观察模型输入为400*300*3的图像
import nst_utils
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 忽略警告
nnmodel = nst_utils.load_vgg_model(nst_utils.CONFIG.VGG_MODEL)
# print(nnmodel)
content_path = "./images/louvre_small.jpg"
src_content = cv2.imread(content_path)
src_content = cv2.cvtColor(src_content, cv2.COLOR_BGR2RGB)
src_content = cv2.resize(src_content, (400, 300))
src_content = nst_utils.reshape_and_normalize_image(src_content)
generate_img = nst_utils.generate_noise_image(src_content)
src_style = cv2.imread("./images/stone_style.jpg")
src_style = cv2.cvtColor(src_style, cv2.COLOR_BGR2RGB)
src_style = cv2.resize(src_style, (400, 300))
src_style = nst_utils.reshape_and_normalize_image(src_style)
计算内容损失和风格损失,注意这里由于后面风格损失除以(4 * (nw * nh) ** 2 * nc ** 2。所以内容损失要除以4 * n_H * n_W * n_C,不然风格迁移不过去,这个原因导致我一直没效果查了大半天。
def cal_content_cost(a_content, g):
_, n_H, n_W, n_C = g.get_shape().as_list()
j_content = tf.reduce_sum(tf.divide(tf.square(tf.subtract(a_content, g)), 4 * n_H * n_W * n_C))
return j_content
def cal_style_cost(style_img, model, sess):
layers = np.array(['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'])
weight = 1 / layers.shape[0]
j_style = 0
for i in layers:
a_style = sess.run(model[i], feed_dict={model["input"]: style_img})
g = model[i]
nh = a_style.shape[1]
nw = a_style.shape[2]
nc = a_style.shape[3]
a_style = tf.reshape(a_style, [-1, a_style.shape[3]])
g = tf.reshape(g, [-1, g.shape[3]])
Gstyle = tf.matmul(tf.transpose(a_style), a_style)
Gg = tf.matmul(tf.transpose(g), g)
j_style += weight * tf.reduce_sum(tf.square(tf.subtract(Gstyle, Gg))) / (4 * (nw * nh) ** 2 * nc ** 2)
return j_style
这里有张图我觉得特别不错,形象的诠释了矩阵转置乘法的巧妙。
建模,运算
def style_transfer_model(generate_img, style_img, content_img, model, alpha, beta, epoch):
cal_a = model["conv4_2"]
sess = tf.Session()
a_genrate = cal_a
a_content = sess.run(cal_a, feed_dict={model["input"]: content_img})
j_content = cal_content_cost(a_content, a_genrate)
j_style = cal_style_cost(style_img, model, sess)
J = alpha * j_content + beta * j_style
opetimizer = tf.train.AdamOptimizer(learning_rate=2).minimize(J) # nst_utils中所有(除输入)参数均被设为constant,后向过程不会被修改
sess.run(tf.global_variables_initializer())
sess.run(model["input"].assign(generate_img))
total_cost = []
style_cost = []
content_cost = []
for i in range(epoch):
_, t_c, s_c, c_c = sess.run([opetimizer, J, j_style, j_content])
total_cost.append(t_c)
style_cost.append(s_c)
content_cost.append(c_c)
generate_img = sess.run(model["input"])
# Print every 20 iteration.
if i % 20 == 0:
print("Iteration " + str(i) + " :")
print("total cost = " + str(t_c))
print("content cost = " + str(c_c))
print("style cost = " + str(s_c))
cv2.imwrite("./output/" + str(i) + ".jpg", np.around(generate_img[0] * 255))
plt.plot(total_cost)
plt.plot(style_cost)
plt.plot(content_cost)
plt.figure()
plt.imshow(np.squeeze(generate_img))
plt.show()
style_transfer_model(generate_img, src_style, src_content, nnmodel, 10, 40, 500)
处理前的,我也不知道为何标准化后变成了这样。
画风随迭代变化