import os
'''
tensorflow 设置日志级别
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' #默认的显示等级,显示所有信息
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #只显示warning 和 error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #只显示error
'''
os.environ['TF_CPP_MIN_LOG_LEVEL']='2' #只显示warning 和 error
import time #记录每次迭代所耗的时间,运行程序后在控制台可以看到时间,单位为second
import numpy as np
import tensorflow as tf
import load_vgg_sol #导入同一个文件夹下的其他py文件,就可以调用其他文件夹下定义的函数了,记得不要加文件后缀名py,否则会报错 No module named 'load_vgg_sol.py'; 'load_vgg_sol' is not a package
import utils #里面定义了style_transfer_sol.py中需要用到的各种函数
def setup(): #在当前目录下创建checkpints和outputs文件夹
utils.safe_mkdir('checkpoints')
utils.safe_mkdir('outputs')
class StyleTransfer(object):
def __init__(self, content_img, style_img, img_width, img_height): #构造函数,用于初始化
'''
img_width and img_height are the dimensions we expect from the generated image. #长和宽是我们希望生成图片的维度
We will resize input content image and input style image to match this dimension.
Feel free to alter any hyperparameter here and see how it affects your training. #在这个类中可以试着调各种超参数, 看看是如何影响你的生成结果的
'''
self.img_width = img_width
self.img_height = img_height
self.content_img = utils.get_resized_image(content_img, img_width, img_height)
self.style_img = utils.get_resized_image(style_img, img_width, img_height)
self.initial_img = utils.generate_noise_image(self.content_img, img_width, img_height)
###############################
## TO DO
## create global step (gstep) and hyperparameters for the model
self.content_layer = 'conv4_2' #从vgg的conv4_2提取content的特征图用于计算内容损失
self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'] #从'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'提取style的特征图用于计算风格损失
self.content_w = 1 #0.01 对应论文中 L = a*L1 + β*L2 中的系数,调节这个可以使生成的图片是更侧重content还是更侧重style,加入content为1,style的系数为0,那生成的图片就和内容更接近
self.style_w = 0 #1
self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0] #'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1' 每层计算损失时的系数,越后越大,因为深层的纹理信息更多
self.gstep = tf.Variable(0, dtype=tf.int32,
trainable=False, name='global_step') #trainable = false就是训练的时候不会更新该值
self.lr = 2.0 #learing rate = 2.0
###############################
def create_input(self):
'''
We will use one input_img as a placeholder for the content image,
style image, and generated image, because:
1. they have the same dimension # content图,style图,generated图三个维度一致
2. we have to extract the same set of features from them
We use a variable instead of a placeholder because we're, at the same time,
training the generated image to get the desirable result.
Note: image height corresponds to number of rows, not columns.
'''
with tf.variable_scope('input') as scope: #variable_scope()常和tf.get_variable()一起使用,用于变量间的共享
self.input_img = tf.get_variable('in_img',
shape=([1, self.img_height, self.img_width, 3]),
dtype=tf.float32,
initializer=tf.zeros_initializer()) #initializer = tf.zeros_initializer() 即全部初始化为0
def load_vgg(self): #载入预训练好的vgg模型
'''
Load the saved model parameters of VGG-19, using the input_img
as the input to compute the output at each layer of vgg.
During training, VGG-19 mean-centered all images and found the mean pixels
to be [123.68, 116.779, 103.939] along RGB dimensions. We have to subtract
this mean from our images. #VGG-19需要对输入图片进行一步预处理,把每个像素点的取值减去训练集算出来的RGB均值,即去均值
'''
self.vgg = load_vgg_sol.VGG(self.input_img)
self.vgg.load()
self.content_img -= self.vgg.mean_pixels #图片预处理,减去均值[123.68, 116.779, 103.939]
self.style_img -= self.vgg.mean_pixels
def _content_loss(self, P, F): #定义内容损失
''' Calculate the loss between the feature representation of the
content image and the generated image.
Inputs:
P: content representation of the content image
F: content representation of the generated image
Read the assignment handout for more details
Note: Don't use the coefficient 0.5 as defined in the paper.
Use the coefficient defined in the assignment handout(讲义可以私信我发给你).
'''
# self.content_loss = None
###############################
## TO DO
self.content_loss = tf.reduce_sum((F - P) ** 2) / (4.0 * P.size) #和原论文对应
###############################
def _gram_matrix(self, F, N, M): #求出用于计算风格损失的gram矩阵
""" Create and return the gram matrix for tensor F
Hint: you'll first have to reshape F
"""
###############################
## TO DO
F = tf.reshape(F, (M, N)) #F的维度为M by N
return tf.matmul(tf.transpose(F), F) #F乘以F的转置求得gram矩阵
###############################
def _single_style_loss(self, a, g): #这是某一层的风格损失,总的风格损失要加上5层的,分别为'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'
""" Calculate the style loss at a certain layer
Inputs:
a is the feature representation of the style image at that layer #a是原图在这一层的风格特征
g is the feature representation of the generated image at that layer #g是生成图像在这一层的风格特征
Output:
the style loss at a certain layer (which is E_l in the paper)
Hint: 1. you'll have to use the function _gram_matrix()
2. we'll use the same coefficient for style loss as in the paper
3. a and g are feature representation, not gram matrices
"""
###############################
## TO DO
N = a.shape[3] # number of filters ,shape[3]是滤波器的个数
M = a.shape[1] * a.shape[2] # height times width of the feature map 特征图的长乘宽
A = self._gram_matrix(a, N, M)
G = self._gram_matrix(g, N, M)
return tf.reduce_sum((G - A) ** 2 / ((2 * N * M) ** 2)) #求出这一层的损失
###############################
def _style_loss(self, A): # 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1' 一共五层总的损失
""" Calculate the total style loss as a weighted sum
of style losses at all style layers
Hint: you'll have to use _single_style_loss()
"""
n_layers = len(A) #这里返回4
E = [self._single_style_loss(A[i], getattr(self.vgg, self.style_layers[i])) for i in range(n_layers)] #0 1 2 3 4 一共五层
###############################
## TO DO
self.style_loss = sum([self.style_layer_w[i] * E[i] for i in range(n_layers)])
###############################
def losses(self):
with tf.variable_scope('losses') as scope:
with tf.Session() as sess: #可以有多个tf.Session()同时运行,tensorflow擅长的就是并行运算
# assign content image to the input variable
sess.run(self.input_img.assign(self.content_img))
gen_img_content = getattr(self.vgg, self.content_layer) #函数getattr(object,name[,default])
content_img_content = sess.run(gen_img_content)
self._content_loss(content_img_content, gen_img_content)
with tf.Session() as sess:
sess.run(self.input_img.assign(self.style_img)) #self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
style_layers = sess.run([getattr(self.vgg, layer) for layer in self.style_layers])
self._style_loss(style_layers)
##########################################
## TO DO: create total loss.
## Hint: don't forget the weights for the content loss and style loss
self.total_loss = self.content_w * self.content_loss + self.style_w * self.style_loss #内容和风格总的损失
##########################################
def optimize(self): #定义优化器,这里和原来论文有区别,这里使用的的AdamOptimizer
###############################
## TO DO: create optimizer
self.opt = tf.train.AdamOptimizer(self.lr).minimize(self.total_loss,
global_step=self.gstep)
###############################
def create_summary(self): #这里定义summary概述图,可以用tensorboard可视化,用于论文作图和观察损失变化等
###############################
## TO DO: create summaries for all the losses #比如可以显示每次迭代后的损失变化
## Hint: don't forget to merge them
with tf.name_scope('summaries'):
tf.summary.scalar('content loss', self.content_loss)
tf.summary.scalar('style loss', self.style_loss)
tf.summary.scalar('total loss', self.total_loss)
self.summary_op = tf.summary.merge_all() #执行这一个就相当于执行了上面三行,免得一个个的调用
###############################
def build(self):
self.create_input() #创建输入,载入网络,计算损失,反向传播优化网络,绘出损失图
self.load_vgg()
self.losses()
self.optimize()
self.create_summary()
def train(self, n_iters):#n_iters 定义迭代次数
skip_step = 1
with tf.Session() as sess:
###############################
## TO DO:
## 1. initialize your variables
## 2. create writer to write your graph
sess.run(tf.global_variables_initializer()) #初始化所有变量
writer = tf.summary.FileWriter('graphs/style_stranfer', sess.graph) #保存到graphs/style_transfer文件夹下
###############################
sess.run(self.input_img.assign(self.initial_img)) #initial_img是噪声图像
###############################
## TO DO:
## 1. create a saver object
## 2. check if a checkpoint exists, restore the variables
saver = tf.train.Saver() #训练网络后想保存训练好的模型,以及在程序中读取以保存的训练好的模型,保存和恢复都需要实例化一个tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/style_transfer/checkpoint'))#然后,在训练循环中,定期调用saver.save()方法,向文件夹中写入包含了当前模型汇中所有可训练变量的checkpoint文件
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
##############################
initial_step = self.gstep.eval() #记得测一下,占时不加注释
start_time = time.time()
for index in range(initial_step, n_iters):
if index >= 5 and index < 20:
skip_step = 10 #控制台多少step显示一次
elif index >= 20:
skip_step = 20
sess.run(self.opt)
if (index + 1) % skip_step == 0:
###############################
## TO DO: obtain generated image, loss, and summary
gen_image, total_loss, summary = sess.run([self.input_img,
self.total_loss,
self.summary_op])
###############################
# add back the mean pixels we subtracted before
gen_image = gen_image + self.vgg.mean_pixels #之前减了一次RGB均值,这里加回来
writer.add_summary(summary, global_step=index)
print('Step {}\n Sum: {:5.1f}'.format(index + 1, np.sum(gen_image)))
print(' Loss: {:5.1f}'.format(total_loss))
print(' Took: {} seconds'.format(time.time() - start_time))
start_time = time.time()
filename = 'outputs/%d.png' % (index)
utils.save_image(filename, gen_image)
if (index + 1) % 20 == 0:
###############################
## TO DO: save the variables into a checkpoint
saver.save(sess, 'checkpoints/style_stranfer/style_transfer', index)
###############################
if __name__ == '__main__': #函数从main这开始执行
setup()
machine = StyleTransfer('content/transp.jpg', 'styles/guernica.jpg', 333, 250)
machine.build()
machine.train(300) #这里给了迭代次数
今天花几个小时注释了下代码,方便之后查阅,做个记录。