时间:20140723-20180729
背景:学习于https://github.com/LijieFan/tvnet的源码
一、demo
import os import cv2 import numpy as np import tensorflow as tf import scipy.io as sio from tvnet import TVNet flags = tf.app.flags #tf定义了tf.app.flags,用于支持接受命令行传递参数,相当于接受argv(参数列表)。 flags.DEFINE_integer("scale", 5, " TVNet scale [3]")#定义命令行参数,第一个是参数名称,第二个参数是默认值,第三个是参数描述(添加命令行参数) flags.DEFINE_integer("warp", 5, " TVNet warp [1]") flags.DEFINE_integer("iteration", 50, " TVNet iteration [10]") flags.DEFINE_string("gpu", '0', " gpu to use [0]") FLAGS = flags.FLAGS #取出命令行参数 scale = FLAGS.scale warp = FLAGS.warp iteration = FLAGS.iteration if int(int(FLAGS.gpu) > -1):#是否使用gpu os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu print ('TVNet Params:\n scale: %d\n warp: %d\n iteration: %d\nUsing gpu: %s'% (scale, warp, iteration, FLAGS.gpu)) # 加载图片 img1 = cv2.imread('frame/img1.png') img2 = cv2.imread('frame/img2.png') h, w, c = img1.shape # model construct 建模 x1 = tf.placeholder(shape=[1, h, w, 3], dtype=tf.float32)#(占位符)初始化分配变量空间 dtype数据类型 shape=[1, h, w, 3] 1*h*w*3维度 x2 = tf.placeholder(shape=[1, h, w, 3], dtype=tf.float32) tvnet = TVNet() u1, u2, rho = tvnet.tvnet_flow(x1,x2,max_scales=scale, warps=warp, max_iterations=iteration) print(11111) # init 建立一个在gpu的会话 sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True), allow_soft_placement=True)) sess.run(tf.global_variables_initializer()) # 全局变量初始化 print(222222) # run model u1_np, u2_np = sess.run([u1, u2], feed_dict={x1: img1[np.newaxis, ...], x2: img2[np.newaxis, ...]}) print(33333333) u1_np = np.squeeze(u1_np) u2_np = np.squeeze(u2_np) flow_mat = np.zeros([h, w, 2]) flow_mat[:, :, 0] = u1_np flow_mat[:, :, 1] = u2_np print(4444444) if not os.path.exists('result'): os.mkdir('result') # 新建目录 res_path = os.path.join('result', 'result.mat') # result/result.mat sio.savemat(res_path, {'flow': flow_mat}) # 把flow_mat模型以字典flow的形式存到上面的路径
二、 tvnet 还没有完全解释完 因为中间一些公式我不会所以卡住了 打算在下周继续深究
import numpy as np import tensorflow as tf import spatial_transformer class TVNet(object): GRAD_IS_ZERO = 1e-12 def __init__(self): pass def grey_scale_image(self, x): assert len(x.shape) == 4 assert x.shape[-1].value == 3, 'number of channels must be 3 (i.e. RGB)' ker_init = tf.constant_initializer([[0.114], [0.587], [0.299]])#核初始化,定义一个常量初始权值为[[0.114], [0.587], [0.299]] grey_x = tf.layers.conv2d(x, 1, [1, 1], padding='same', kernel_initializer=ker_init, use_bias=False, trainable=False) return tf.floor(grey_x) def normalize_images(self, x1, x2):#归一化 reduction_axes = [i for i in range(1, len(x1.shape))]#产生从1-1*3*w*h的索引列表 min_x1 = tf.reduce_min(x1, axis=reduction_axes)#求出下的最小值,最大值 max_x1 = tf.reduce_max(x1, axis=reduction_axes) min_x2 = tf.reduce_min(x2, axis=reduction_axes)#reducemin 矩阵求最小值 max_x2 = tf.reduce_max(x2, axis=reduction_axes) min_val = tf.minimum(min_x1, min_x2) max_val = tf.maximum(max_x1, max_x2) den = max_val - min_val expand_dims = [-1 if i == 0 else 1 for i in range(len(x1.shape))]#产生一个list [-1,1,1,1...] min_val_ex = tf.reshape(min_val, expand_dims)#用minval填充expanddims den_ex = tf.reshape(den, expand_dims) x1_norm = tf.where(den > 0, 255. * (x1 - min_val_ex) / den_ex, x1)#一种常见的图像归一化原理1是y=(x-MinValue)/(MaxValue-MinValue) x2_norm = tf.where(den > 0, 255. * (x2 - min_val_ex) / den_ex, x2)#如果den>0,将255. * (x2 - min_val_ex) / den_ex给x2_norm #x只负责对应替换True的元素,y只负责对应替换False的元素,x,y各有分工 # 由于是替换,返回值的维度,和condition,x , y都是相等的。 return x1_norm, x2_norm def gaussian_smooth(self, x): assert len(x.shape) == 4 ker_init = tf.constant_initializer([[0.000874, 0.006976, 0.01386, 0.006976, 0.000874], [0.006976, 0.0557, 0.110656, 0.0557, 0.006976], [0.01386, 0.110656, 0.219833, 0.110656, 0.01386], [0.006976, 0.0557, 0.110656, 0.0557, 0.006976], [0.000874, 0.006976, 0.01386, 0.006976, 0.000874]]) smooth_x = tf.layers.conv2d(x, x.shape[-1].value, [5, 5], padding='same', kernel_initializer=ker_init, use_bias=False, trainable=False) return smooth_x def warp_image(self, x, u, v): assert len(x.shape) == 4 assert len(u.shape) == 3 assert len(v.shape) == 3 u = u / x.shape[2].value * 2 v = v / x.shape[1].value * 2 delta = tf.concat(axis=1, values=[u, v]) return spatial_transformer.transformer(x, delta, (x.shape[-3].value, x.shape[-2].value)) def centered_gradient(self, x, name): assert len(x.shape) == 4 with tf.variable_scope('centered_gradient'): x_ker_init = tf.constant_initializer([[-0.5, 0, 0.5]]) diff_x = tf.layers.conv2d(x, x.shape[-1].value, [1, 3], padding='same', kernel_initializer=x_ker_init, use_bias=False, name=name + '_diff_x', trainable=False) y_ker_init = tf.constant_initializer([[-0.5], [0], [0.5]]) diff_y = tf.layers.conv2d(x, x.shape[-1].value, [3, 1], padding='same', kernel_initializer=y_ker_init, use_bias=False, name=name + '_diff_y', trainable=False) # refine the boundary first_col = 0.5 * (tf.slice(x, [0, 0, 1, 0], [-1, x.shape[1].value, 1, x.shape[3].value]) - tf.slice(x, [0, 0, 0, 0], [-1, x.shape[1].value, 1, x.shape[3].value])) last_col = 0.5 * ( tf.slice(x, [0, 0, x.shape[2].value - 1, 0], [-1, x.shape[1].value, 1, x.shape[3].value]) - tf.slice(x, [0, 0, x.shape[2].value - 2, 0], [-1, x.shape[1].value, 1, x.shape[3].value])) diff_x_valid = tf.slice(diff_x, begin=[0, 0, 1, 0], size=[-1, x.shape[1].value, x.shape[2].value - 2, x.shape[3].value]) diff_x = tf.concat(axis=2, values=[first_col, diff_x_valid, last_col]) first_row = 0.5 * (tf.slice(x, [0, 1, 0, 0], [-1, 1, x.shape[2].value, x.shape[3].value]) - tf.slice(x, [0, 0, 0, 0], [-1, 1, x.shape[2].value, x.shape[3].value])) last_row = 0.5 * ( tf.slice(x, [0, x.shape[1].value - 1, 0, 0], [-1, 1, x.shape[2].value, x.shape[3].value]) - tf.slice(x, [0, x.shape[1].value - 2, 0, 0], [-1, 1, x.shape[2].value, x.shape[3].value])) diff_y_valid = tf.slice(diff_y, begin=[0, 1, 0, 0], size=[-1, x.shape[1].value - 2, x.shape[2].value, x.shape[3].value]) diff_y = tf.concat(axis=1, values=[first_row, diff_y_valid, last_row]) return diff_x, diff_y def forward_gradient(self, x, name): assert len(x.shape) == 4 with tf.variable_scope('forward_gradient'): x_ker_init = tf.constant_initializer([[-1, 1]]) diff_x = tf.layers.conv2d(x, x.shape[-1].value, [1, 2], padding='same', kernel_initializer=x_ker_init, use_bias=False, name=name + '_diff_x', trainable=True) y_ker_init = tf.constant_initializer([[-1], [1]]) diff_y = tf.layers.conv2d(x, x.shape[-1].value, [2, 1], padding='same', kernel_initializer=y_ker_init, use_bias=False, name=name + '_diff_y', trainable=True) # refine the boundary diff_x_valid = tf.slice(diff_x, begin=[0, 0, 0, 0], size=[-1, x.shape[1].value, x.shape[2].value - 1, x.shape[3].value]) last_col = tf.zeros([tf.shape(x)[0], x.shape[1].value, 1, x.shape[3].value], dtype=tf.float32) diff_x = tf.concat(axis=2, values=[diff_x_valid, last_col]) diff_y_valid = tf.slice(diff_y, begin=[0, 0, 0, 0], size=[-1, x.shape[1].value - 1, x.shape[2].value, x.shape[3].value]) last_row = tf.zeros([tf.shape(x)[0], 1, x.shape[2].value, x.shape[3].value], dtype=tf.float32) diff_y = tf.concat(axis=1, values=[diff_y_valid, last_row]) return diff_x, diff_y def divergence(self, x, y, name): assert len(x.shape) == 4 with tf.variable_scope('divergence'): x_valid = tf.slice(x, begin=[0, 0, 0, 0], size=[-1, x.shape[1].value, x.shape[2].value - 1, x.shape[3].value]) first_col = tf.zeros([tf.shape(x)[0], x.shape[1].value, 1, x.shape[3].value], dtype=tf.float32) x_pad = tf.concat(axis=2, values=[first_col, x_valid]) y_valid = tf.slice(y, begin=[0, 0, 0, 0], size=[-1, y.shape[1].value - 1, y.shape[2].value, y.shape[3].value]) first_row = tf.zeros([tf.shape(y)[0], 1, y.shape[2].value, y.shape[3].value], dtype=tf.float32) y_pad = tf.concat(axis=1, values=[first_row, y_valid]) x_ker_init = tf.constant_initializer([[-1, 1]]) diff_x = tf.layers.conv2d(x_pad, x.shape[-1].value, [1, 2], padding='same', kernel_initializer=x_ker_init, use_bias=False, name=name + '_diff_x', trainable=True) y_ker_init = tf.constant_initializer([[-1], [1]]) diff_y = tf.layers.conv2d(y_pad, y.shape[-1].value, [2, 1], padding='same', kernel_initializer=y_ker_init, use_bias=False, name=name + '_diff_y', trainable=True) div = diff_x + diff_y return div def zoom_size(self, height, width, factor): new_height = int(float(height) * factor + 0.5) new_width = int(float(width) * factor + 0.5) return new_height, new_width def zoom_image(self, x, new_height, new_width): assert len(x.shape) == 4 delta = tf.zeros((tf.shape(x)[0], 2, new_height * new_width)) zoomed_x = spatial_transformer.transformer(x, delta, (new_height, new_width)) # 空间变换网络 x*delta is w*h return tf.reshape(zoomed_x, [tf.shape(x)[0], new_height, new_width, x.shape[-1].value]) def dual_tvl1_optic_flow(self, x1, x2, u1, u2, tau=0.25, # time step lbda=0.15, # weight parameter for the data term theta=0.3, # weight parameter for (u - v)^2 warps=5, # number of warpings per scale max_iterations=5 # maximum number of iterations for optimization ): # u1, u2, rho = self.dual_tvl1_optic_flow(down_x1, down_x2, u1, u2, # tau=tau, lbda=lbda, theta=theta, warps=warps, # max_iterations=max_iterations) l_t = lbda * theta taut = tau / theta diff2_x, diff2_y = self.centered_gradient(x2, 'x2') p11 = p12 = p21 = p22 = tf.zeros_like(x1) for warpings in range(warps): with tf.variable_scope('warping%d' % (warpings,)): u1_flat = tf.reshape(u1, (tf.shape(x2)[0], 1, x2.shape[1].value * x2.shape[2].value)) u2_flat = tf.reshape(u2, (tf.shape(x2)[0], 1, x2.shape[1].value * x2.shape[2].value)) x2_warp = self.warp_image(x2, u1_flat, u2_flat) x2_warp = tf.reshape(x2_warp, tf.shape(x2)) diff2_x_warp = self.warp_image(diff2_x, u1_flat, u2_flat) diff2_x_warp = tf.reshape(diff2_x_warp, tf.shape(diff2_x)) diff2_y_warp = self.warp_image(diff2_y, u1_flat, u2_flat) diff2_y_warp = tf.reshape(diff2_y_warp, tf.shape(diff2_y)) diff2_x_sq = tf.square(diff2_x_warp) diff2_y_sq = tf.square(diff2_y_warp) grad = diff2_x_sq + diff2_y_sq + self.GRAD_IS_ZERO rho_c = x2_warp - diff2_x_warp * u1 - diff2_y_warp * u2 - x1 for ii in range(max_iterations): with tf.variable_scope('iter%d' % (ii,)): rho = rho_c + diff2_x_warp * u1 + diff2_y_warp * u2 + self.GRAD_IS_ZERO; masks1 = rho < -l_t * grad d1_1 = tf.where(masks1, l_t * diff2_x_warp, tf.zeros_like(diff2_x_warp)) d2_1 = tf.where(masks1, l_t * diff2_y_warp, tf.zeros_like(diff2_y_warp)) masks2 = rho > l_t * grad d1_2 = tf.where(masks2, -l_t * diff2_x_warp, tf.zeros_like(diff2_x_warp)) d2_2 = tf.where(masks2, -l_t * diff2_y_warp, tf.zeros_like(diff2_y_warp)) masks3 = (~masks1) & (~masks2) & (grad > self.GRAD_IS_ZERO) d1_3 = tf.where(masks3, -rho / grad * diff2_x_warp, tf.zeros_like(diff2_x_warp)) d2_3 = tf.where(masks3, -rho / grad * diff2_y_warp, tf.zeros_like(diff2_y_warp)) v1 = d1_1 + d1_2 + d1_3 + u1 v2 = d2_1 + d2_2 + d2_3 + u2 u1 = v1 + theta * self.divergence(p11, p12, 'div_p1') u2 = v2 + theta * self.divergence(p21, p22, 'div_p2') u1x, u1y = self.forward_gradient(u1, 'u1') u2x, u2y = self.forward_gradient(u2, 'u2') p11 = (p11 + taut * u1x) / ( 1.0 + taut * tf.sqrt(tf.square(u1x) + tf.square(u1y) + self.GRAD_IS_ZERO)); p12 = (p12 + taut * u1y) / ( 1.0 + taut * tf.sqrt(tf.square(u1x) + tf.square(u1y) + self.GRAD_IS_ZERO)); p21 = (p21 + taut * u2x) / ( 1.0 + taut * tf.sqrt(tf.square(u2x) + tf.square(u2y) + self.GRAD_IS_ZERO)); p22 = (p22 + taut * u2y) / ( 1.0 + taut * tf.sqrt(tf.square(u2x) + tf.square(u2y) + self.GRAD_IS_ZERO)); return u1, u2, rho def tvnet_flow(self, x1, x2, tau=0.25, # 时间步长 lbda=0.15, # 每一个数据项的参数权重 theta=0.3, # weight parameter for (u - v)^2 warps=5, # number of warpings per scale zfactor=0.5, # factor for building the image piramid 图像piramid的构建因素 max_scales=5, # maximum number of scales for image piramid 图像piramid的最放缩倍数 max_iterations=5 # maximum number of iterations for optimization 为优化而第二代的最大数量 ): for i in range(len(x1.shape)):#找到第一个不一致的点 assert x1.shape[i].value == x2.shape[i].value#assert是断言的意思,解释为:我断定这个程序执行之后或者之前会有这样的结果,如果不是,那就扔出一个错误。 zfactor = np.float32(zfactor)#定义一个float的变量 height = x1.shape[-3].value#取图片的高和宽 width = x1.shape[-2].value n_scales = 1 + np.log(np.sqrt(height ** 2 + width ** 2) / 4.0) / np.log(1 / zfactor);####? n_scales = min(n_scales, max_scales) # n_scales = 1 with tf.variable_scope('tvl1_flow'): # 用variable_scope定义一个名为'tvl1_flow'变量父空间 grey_x1 = self.grey_scale_image(x1) # 卷积灰化 grey_x2 = self.grey_scale_image(x2) norm_imgs = self.normalize_images(grey_x1, grey_x2) # 将两个图像归一化返回一个数组形式的归一 smooth_x1 = self.gaussian_smooth(norm_imgs[0]) # 卷积平滑处理 smooth_x2 = self.gaussian_smooth(norm_imgs[1]) for ss in range(n_scales - 1, -1, -1):#在xrange以1的步长倒着 倒金字塔 print("ss:",ss) with tf.variable_scope('scale%d' % ss): down_sample_factor = zfactor ** ss # 下降样例因子 down_height, down_width = self.zoom_size(height, width, down_sample_factor)#放缩后的宽高 if ss == n_scales - 1:#如果ss是缩放比 u1 = tf.get_variable('u1', shape=[1, down_height, down_width, 1], dtype=tf.float32, initializer=tf.zeros_initializer) u2 = tf.get_variable('u2', shape=[1, down_height, down_width, 1], dtype=tf.float32, initializer=tf.zeros_initializer) u1 = tf.tile(u1, [tf.shape(smooth_x1)[0], 1, 1, 1])#tile(A,n)把A复制n次 1 u2 = tf.tile(u2, [tf.shape(smooth_x1)[0], 1, 1, 1]) down_x1 = self.zoom_image(smooth_x1, down_height, down_width) # 变焦 down_x2 = self.zoom_image(smooth_x2, down_height, down_width) u1, u2, rho = self.dual_tvl1_optic_flow(down_x1, down_x2, u1, u2, tau=tau, lbda=lbda, theta=theta, warps=warps, max_iterations=max_iterations) if ss == 0: return u1, u2, rho up_sample_factor = zfactor ** (ss - 1) up_height, up_width = self.zoom_size(height, width, up_sample_factor) u1 = self.zoom_image(u1, up_height, up_width) / zfactor u2 = self.zoom_image(u2, up_height, up_width) / zfactor def get_loss(self, x1, x2, tau=0.25, # time step lbda=0.15, # weight parameter for the data term theta=0.3, # weight parameter for (u - v)^2 warps=5, # number of warpings per scale zfactor=0.5, # factor for building the image piramid max_scales=5, # maximum number of scales for image piramid max_iterations=5 # maximum number of iterations for optimization ): u1, u2, rho = self.tvnet_flow(x1, x2, tau=tau, lbda=lbda, theta=theta, warps=warps, zfactor=zfactor, max_scales=max_scales, max_iterations=max_iterations) # computing loss u1x, u1y = self.forward_gradient(u1, 'u1') u2x, u2y = self.forward_gradient(u2, 'u2') u1_flat = tf.reshape(u1, (tf.shape(x2)[0], 1, x2.shape[1].value * x2.shape[2].value)) u2_flat = tf.reshape(u2, (tf.shape(x2)[0], 1, x2.shape[1].value * x2.shape[2].value)) x2_warp = self.warp_image(x2, u1_flat, u2_flat) x2_warp = tf.reshape(x2_warp, tf.shape(x2)) loss = lbda * tf.reduce_mean(tf.abs(x2_warp - x1)) + tf.reduce_mean( tf.abs(u1x) + tf.abs(u1y) + tf.abs(u2x) + tf.abs(u2y)) return loss, u1, u2 def MatrixToImage(self,data): from PIL import Image data = data * 255 new_im = Image.fromarray(data.astype(np.uint8)) return new_im