# Copyright 2017 Xintong Han. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
""" Test for Stage 1: from product image + body segment +
pose + face/hair predict a coarse result and product segment.
"""
第一阶段测试:来自成品图片+身体切割+姿势+面部/头发预测粗略的结果和成果细分。
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import *将新版本的特性引进当前版本中,也就是说用python2.x体验python3.x的写法,在当前版本使用新版本的一些特性。
import collections
import os
import time
import numpy as np
import scipy.io as sio
import scipy.misc
import tensorflow as tf
from utils import *
from model_zalando_mask_content import create_generator
FLAGS = tf.app.flags.FLAGS
tf.flags.DEFINE_string("pose_dir", "data/pose/",
"Directory containing poses.")
tf.flags.DEFINE_string("segment_dir", "data/segment/",
"Directory containing human segmentations.")
tf.flags.DEFINE_string("image_dir", "data/women_top/",
"Directory containing product and person images.")
tf.flags.DEFINE_string("test_label",
"data/viton_test_pairs.txt",
"File containing labels for testing.")
tf.flags.DEFINE_string("result_dir", "results/",
"Folder containing the results of testing.")
tf.flags.DEFINE_integer("begin", "0", "")
tf.flags.DEFINE_integer("end", "2032", "")
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.flags.DEFINE_xxx()就是添加命令行的optional argument(可选参数),而tf.app.flags.FLAGS可以从对应的命令行参数取出参数。
tf.app.flags.DEFINE_xxx(参数名,默认值,-h 显示参数输入提示)
具体参阅https://blog.csdn.net/zzc15806/article/details/81133045
tf.logging.set_verbosity (tf.logging.INFO)是将 TensorFlow 日志信息输出到屏幕
# preprocess images for testing
为测试预处理图像,定义函数:
处理图像(图像名称,产品图像名称,sess,调整\宽度=192,调整\高度=256)
def _process_image(image_name, product_image_name, sess,
resize_width=192, resize_height=256):
image_id = image_name[:-4]
字符串的分片操作,截取图片名的首字符到倒数第五个字符(倒数第四个字符之前),对于图片来说,如xx.jpg,那就是只保留xx,去掉文件格式。
image = scipy.misc.imread(FLAGS.image_dir + image_name)
prod_image = scipy.misc.imread(FLAGS.image_dir + product_image_name)
scipy.misc.imread(name, flatten=False, mode=None)将图片读取出来为array类型,即numpy类型
segment_raw = sio.loadmat(os.path.join(
FLAGS.segment_dir, image_id))["segment"]
函数loadmat将存储在MAT-file中的变量加载为简单的Python数据结构,仅使用词典和列表对象。数值数组和cell数组 转换为以行为主的嵌套列表(lists)。压缩数组,消除只含有一个元素的数组。产生的数据结构由简单的类型组成,兼容JSON格式。
loadmat仅读取segment.mat文件中的segment字典内容
segment_raw = process_segment_map(segment_raw, image.shape[0], image.shape[1])
pose_raw = sio.loadmat(os.path.join(FLAGS.pose_dir, image_id))
pose_raw = extract_pose_keypoints(pose_raw)
pose_raw = extract_pose_map(pose_raw, image.shape[0], image.shape[1])
pose_raw = np.asarray(pose_raw, np.float32)
body_segment, prod_segment, skin_segment = extract_segmentation(segment_raw)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
prod_image = tf.image.convert_image_dtype(prod_image, dtype=tf.float32)
获取转换为D型的图像。
tf.image.convert_image_dtype(
image,图像:一个图像。
dtype,D型:要将图像转为D型。
saturate=False,饱和:真,则在强制转换前裁剪输入。
name=None名称:此操作的名称(可选)。
)
将图像转换为D类,根据需要缩放其值。
使用浮点值表示的图像的值预期在[0,1)范围内。
这个操作可以在数据类型之间转换,在转换之前适当地缩放值。
请注意,从浮点输入转换为整数类型可能会导致上溢/下溢问题。设置饱和度为真,以避免有问题的转换中出现此类问题。
image = tf.image.resize_images(image,
size=[resize_height, resize_width],
method=tf.image.ResizeMethod.BILINEAR)
prod_image = tf.image.resize_images(prod_image,
size=[resize_height, resize_width],
method=tf.image.ResizeMethod.BILINEAR)
body_segment = tf.image.resize_images(body_segment,
size=[resize_height, resize_width],
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False)
skin_segment = tf.image.resize_images(skin_segment,
size=[resize_height, resize_width],
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False)
prod_segment = tf.image.resize_images(prod_segment,
size=[resize_height, resize_width],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
调整图像大小,
ResizeMethod.BILINEAR方式为双线性内插,其核心思想是在两个方向分别进行一次线性插值。
ResizeMethod.NEAREST_NEIGHBOR方式为最近邻插值法,将变换后的图像中的原像素点最邻近像素的灰度值赋给原像素点的方法,返回图像张量dtype与所传入的相同。
align_corners默认为false不精确对准输入输出图像的四个角。
具体效果参考:https://www.jianshu.com/p/9cccdb418674
image = (image - 0.5) * 2.0
prod_image = (prod_image - 0.5) * 2.0
# using skin rbg
skin_segment = skin_segment * image
[image, prod_image, body_segment, prod_segment, skin_segment] = sess.run(
[image, prod_image, body_segment, prod_segment, skin_segment])
调用sess的run做矩阵乘法。
return image, prod_image, pose_raw, body_segment, prod_segment, skin_segment
def main(unused_argv):
try:
os.mkdir(FLAGS.result_dir)
except:
pass
try:
os.mkdir(FLAGS.result_dir + "/images/")
except:
pass
try:
os.mkdir(FLAGS.result_dir + "/tps/")
except:
pass
检查目录不存在则创建,批量推理,也可以一次完成一个图像。
# batch inference, can also be done one image per time.
batch_size = 1
批处理参数,它的极限值为训练集样本总数,当数据量比较少时,可以将batch_size值设置为全数据集(Full batch cearning)。
实际上,在深度学习中所涉及到的数据都是比较多的,一般都采用小批量数据处理原则。相对海量的的数据集和内存容量,小批量处理需要更少的内存就可以训练网络。通常小批量训练网络速度更快。
极端特例batch_size = 1,也成为在线学习(online learning);线性神经元在均方误差代价函数的错误面是一个抛物面,横截面是椭圆,对于多层神经元、非线性网络,在局部依然近似是抛物面,使用online learning,每次修正方向以各自样本的梯度方向修正,这就造成了波动较大,难以达到收敛效果。
参考:https://www.cnblogs.com/gengyi/p/9853664.html
prod_image_holder = tf.placeholder(
tf.float32, shape=[batch_size, 256, 192, 3])
body_segment_holder = tf.placeholder(
tf.float32, shape=[batch_size, 256, 192, 1])
skin_segment_holder = tf.placeholder(
tf.float32, shape=[batch_size, 256, 192, 3])
pose_map_holder = tf.placeholder(tf.float32, shape=[batch_size, 256, 192, 18])
with tf.variable_scope("generator") as scope:
outputs = create_generator(prod_image_holder, body_segment_holder,
skin_segment_holder, pose_map_holder, 4)
placeholder()是在神经网络构建graph的时候在模型中的占位,此时只会分配必要的内存,不把数据传入模型。等建立session后,在会话中运行模型的时候通过feed_dict()向占位符喂入数据。
好处就是:避免反复地切换底层程序实际运行的上下文,tensorflow优化整个系统的代码。
tf.variable_scope用于定义创建变量(层)的操作的上下文管理器,可以验证(可选)values是否来自同一图形,确保图形是默认的图形,并推送名称范围和变量范围。
https://www.w3cschool.cn/tensorflow_python/tensorflow_python-61ue2ocp.html
images = np.zeros((batch_size, 256, 192, 3))
prod_images = np.zeros((batch_size, 256, 192, 3))
body_segments = np.zeros((batch_size, 256, 192, 1))
skin_segments = np.zeros((batch_size, 256, 192, 3))
pose_raws = np.zeros((batch_size, 256, 192, 18))
生成相应大小的零矩阵。
saver = tf.train.Saver()
导入张量流,创建一些变量,添加操作以保存和恢复所有变量。将训练好的模型参数保存起来,以便以后进行验证或测试。
其中max_to_keep 参数,这个是用来设置保存模型的个数,默认为5,即 max_to_keep=5,保存最近的5个模型。
with tf.Session() as sess:
print("loading model from checkpoint")
checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint)
if checkpoint == None:
checkpoint = FLAGS.checkpoint
print(checkpoint)
step = int(checkpoint.split('-')[-1])
saver.restore(sess, checkpoint)
模型的恢复用的是restore(),它需要两个参数restore(sess, save_path),save_path指的是保存的模型路径。可以使用tf.train.latest_checkpoint()来自动获取最后一次保存的模型。
# reading input data
test_info = open(FLAGS.test_label).read().splitlines()
for i in range(FLAGS.begin, FLAGS.end, batch_size):
# loading batch data
image_names = []
product_image_names = []
for j in range(i, i + batch_size):
info = test_info[j].split()
print(info)
image_name = info[0]
product_image_name = info[1]
image_names.append(image_name)
product_image_names.append(product_image_name)
(image, prod_image, pose_raw,
body_segment, prod_segment,
skin_segment) = _process_image(image_name,
product_image_name, sess)
images[j-i] = image
prod_images[j-i] = prod_image
body_segments[j-i] = body_segment
skin_segments[j-i] = skin_segment
pose_raws[j-i] = pose_raw
读取输入数据,加载批处理数据,对相应图片命名。
# inference
feed_dict = {
prod_image_holder: prod_images,
body_segment_holder: body_segments,
skin_segment_holder: skin_segments,
pose_map_holder: pose_raws,
}
[image_and_mask_output] = sess.run([outputs],
feed_dict=feed_dict)
mask_output = image_and_mask_output[:,:,:,:1]
image_output = image_and_mask_output[:,:,:,1:]
投喂数据进行推理并输出。
# write results
for j in range(batch_size):
scipy.misc.imsave(FLAGS.result_dir + ("images/%08d_" % step) +
image_names[j] + "_" + product_image_names[j] + '.png',
(image_output[j] / 2.0 + 0.5))
scipy.misc.imsave(FLAGS.result_dir + ("images/%08d_" % step) +
image_names[j] + "_" + product_image_names[j] + '_mask.png',
np.squeeze(mask_output[j]))
scipy.misc.imsave(FLAGS.result_dir + "images/" +
image_names[j], (images[j] / 2.0 + 0.5))
scipy.misc.imsave(FLAGS.result_dir + "images/" +
product_image_names[j], (prod_images[j] / 2.0 + 0.5))
sio.savemat(FLAGS.result_dir + "/tps/" + ("%08d_" % step) +
image_names[j] + "_" + product_image_names[j] + "_mask.mat",
{"mask": np.squeeze(mask_output[j])})
# write html
index_path = os.path.join(FLAGS.result_dir, "index.html")
if os.path.exists(index_path):
index = open(index_path, "a")
else:
index = open(index_path, "w")
index.write("<html><body><table><tr>")
index.write("<th>step</th>")
index.write("<th>name</th><th>input</th>"
"<th>output</th><th>target</th></tr>")
for j in range(batch_size):
index.write("<tr>")
index.write("<td>%d %d</td>" % (step, i + j))
index.write("<td>%s %s</td>" % (image_names[j], product_image_names[j]))
index.write("<td><img src='images/%s'></td>" % image_names[j])
index.write("<td><img src='images/%s'></td>" % product_image_names[j])
index.write("<td><img src='images/%08d_%s'></td>" %
(step, image_names[j] + "_" + product_image_names[j] + '.png'))
index.write("<td><img src='images/%08d_%s'></td>" %
(step, image_names[j] + "_" + product_image_names[j] + '_mask.png'))
index.write("</tr>")
if __name__ == "__main__":
tf.app.run()