tvnet学习

最新推荐文章于 2022-10-16 21:51:29 发布

鹿小闹

最新推荐文章于 2022-10-16 21:51:29 发布

阅读量948

点赞数

分类专栏： Study 文章标签： tvnet

本文链接：https://blog.csdn.net/qq_36271653/article/details/81272229

版权

Study 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

时间：20140723-20180729

背景：学习于https://github.com/LijieFan/tvnet的源码

一、demo

import os
import cv2
import numpy as np
import tensorflow as tf
import scipy.io as sio
from  tvnet import TVNet

flags = tf.app.flags   #tf定义了tf.app.flags，用于支持接受命令行传递参数，相当于接受argv(参数列表)。
flags.DEFINE_integer("scale", 5, " TVNet scale [3]")#定义命令行参数，第一个是参数名称，第二个参数是默认值，第三个是参数描述（添加命令行参数）
flags.DEFINE_integer("warp", 5, " TVNet warp [1]")
flags.DEFINE_integer("iteration", 50, " TVNet iteration [10]")
flags.DEFINE_string("gpu", '0', " gpu to use [0]")
FLAGS = flags.FLAGS #取出命令行参数

scale = FLAGS.scale
warp = FLAGS.warp
iteration = FLAGS.iteration
if int(int(FLAGS.gpu) > -1):#是否使用gpu
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

print ('TVNet Params:\n scale: %d\n warp: %d\n iteration: %d\nUsing gpu: %s'% (scale, warp, iteration, FLAGS.gpu))

# 加载图片



img1 = cv2.imread('frame/img1.png')
img2 = cv2.imread('frame/img2.png')
h, w, c = img1.shape

# model construct 建模
x1 = tf.placeholder(shape=[1, h, w, 3], dtype=tf.float32)#（占位符）初始化分配变量空间 dtype数据类型 shape=[1, h, w, 3] 1*h*w*3维度
x2 = tf.placeholder(shape=[1, h, w, 3], dtype=tf.float32)
tvnet = TVNet()
u1, u2, rho = tvnet.tvnet_flow(x1,x2,max_scales=scale,
                     warps=warp,
                     max_iterations=iteration)
print(11111)
# init  建立一个在gpu的会话
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True), allow_soft_placement=True))
sess.run(tf.global_variables_initializer()) # 全局变量初始化
print(222222)
# run model
u1_np, u2_np = sess.run([u1, u2], feed_dict={x1: img1[np.newaxis, ...], x2: img2[np.newaxis, ...]})
print(33333333)
u1_np = np.squeeze(u1_np)
u2_np = np.squeeze(u2_np)
flow_mat = np.zeros([h, w, 2])
flow_mat[:, :, 0] = u1_np
flow_mat[:, :, 1] = u2_np
print(4444444)
if not os.path.exists('result'):
    os.mkdir('result') # 新建目录
res_path = os.path.join('result', 'result.mat')  # result/result.mat
sio.savemat(res_path, {'flow': flow_mat})  # 把flow_mat模型以字典flow的形式存到上面的路径

二、 tvnet 还没有完全解释完因为中间一些公式我不会所以卡住了打算在下周继续深究

import numpy as np
import tensorflow as tf
import spatial_transformer


class TVNet(object):
    GRAD_IS_ZERO = 1e-12

    def __init__(self):
        pass

    def grey_scale_image(self, x):
        assert len(x.shape) == 4
        assert x.shape[-1].value == 3, 'number of channels must be 3 (i.e. RGB)'

        ker_init = tf.constant_initializer([[0.114], [0.587], [0.299]])#核初始化，定义一个常量初始权值为[[0.114], [0.587], [0.299]]
        grey_x = tf.layers.conv2d(x, 1, [1, 1], padding='same',
                                  kernel_initializer=ker_init, use_bias=False, trainable=False)

        return tf.floor(grey_x)

    def normalize_images(self, x1, x2):#归一化
        reduction_axes = [i for i in range(1, len(x1.shape))]#产生从1-1*3*w*h的索引列表
        min_x1 = tf.reduce_min(x1, axis=reduction_axes)#求出下的最小值，最大值
        max_x1 = tf.reduce_max(x1, axis=reduction_axes)

        min_x2 = tf.reduce_min(x2, axis=reduction_axes)#reducemin 矩阵求最小值
        max_x2 = tf.reduce_max(x2, axis=reduction_axes)

        min_val = tf.minimum(min_x1, min_x2)
        max_val = tf.maximum(max_x1, max_x2)

        den = max_val - min_val

        expand_dims = [-1 if i == 0 else 1 for i in range(len(x1.shape))]#产生一个list [-1，1，1，1...]
        min_val_ex = tf.reshape(min_val, expand_dims)#用minval填充expanddims
        den_ex = tf.reshape(den, expand_dims)

        x1_norm = tf.where(den > 0, 255. * (x1 - min_val_ex) / den_ex, x1)#一种常见的图像归一化原理1是y=(x-MinValue)/(MaxValue-MinValue)
        x2_norm = tf.where(den > 0, 255. * (x2 - min_val_ex) / den_ex, x2)#如果den>0,将255. * (x2 - min_val_ex) / den_ex给x2_norm
        #x只负责对应替换True的元素，y只负责对应替换False的元素，x，y各有分工
        # 由于是替换，返回值的维度，和condition，x ， y都是相等的。

        return x1_norm, x2_norm

    def gaussian_smooth(self, x):
        assert len(x.shape) == 4
        ker_init = tf.constant_initializer([[0.000874, 0.006976, 0.01386, 0.006976, 0.000874],
                                            [0.006976, 0.0557, 0.110656, 0.0557, 0.006976],
                                            [0.01386, 0.110656, 0.219833, 0.110656, 0.01386],
                                            [0.006976, 0.0557, 0.110656, 0.0557, 0.006976],
                                            [0.000874, 0.006976, 0.01386, 0.006976, 0.000874]])
        smooth_x = tf.layers.conv2d(x, x.shape[-1].value, [5, 5], padding='same',
                                    kernel_initializer=ker_init, use_bias=False, trainable=False)

        return smooth_x

    def warp_image(self, x, u, v):
        assert len(x.shape) == 4
        assert len(u.shape) == 3
        assert len(v.shape) == 3
        u = u / x.shape[2].value * 2
        v = v / x.shape[1].value * 2

        delta = tf.concat(axis=1, values=[u, v])
        return spatial_transformer.transformer(x, delta, (x.shape[-3].value, x.shape[-2].value))

    def centered_gradient(self, x, name):
        assert len(x.shape) == 4

        with tf.variable_scope('centered_gradient'):
            x_ker_init = tf.constant_initializer([[-0.5, 0, 0.5]])
            diff_x = tf.layers.conv2d(x, x.shape[-1].value, [1, 3], padding='same',
                                      kernel_initializer=x_ker_init, use_bias=False, name=name + '_diff_x',
                                      trainable=False)

            y_ker_init = tf.constant_initializer([[-0.5], [0], [0.5]])
            diff_y = tf.layers.conv2d(x, x.shape[-1].value, [3, 1], padding='same',
                                      kernel_initializer=y_ker_init, use_bias=False, name=name + '_diff_y',
                                      trainable=False)

            # refine the boundary
            first_col = 0.5 * (tf.slice(x, [0, 0, 1, 0], [-1, x.shape[1].value, 1, x.shape[3].value]) -
                               tf.slice(x, [0, 0, 0, 0], [-1, x.shape[1].value, 1, x.shape[3].value]))

            last_col = 0.5 * (
                tf.slice(x, [0, 0, x.shape[2].value - 1, 0], [-1, x.shape[1].value, 1, x.shape[3].value]) -
                tf.slice(x, [0, 0, x.shape[2].value - 2, 0], [-1, x.shape[1].value, 1, x.shape[3].value]))
            diff_x_valid = tf.slice(diff_x, begin=[0, 0, 1, 0],
                                    size=[-1, x.shape[1].value, x.shape[2].value - 2, x.shape[3].value])
            diff_x = tf.concat(axis=2, values=[first_col, diff_x_valid, last_col])

            first_row = 0.5 * (tf.slice(x, [0, 1, 0, 0], [-1, 1, x.shape[2].value, x.shape[3].value]) -
                               tf.slice(x, [0, 0, 0, 0], [-1, 1, x.shape[2].value, x.shape[3].value]))
            last_row = 0.5 * (
                tf.slice(x, [0, x.shape[1].value - 1, 0, 0], [-1, 1, x.shape[2].value, x.shape[3].value]) -
                tf.slice(x, [0, x.shape[1].value - 2, 0, 0], [-1, 1, x.shape[2].value, x.shape[3].value]))
            diff_y_valid = tf.slice(diff_y, begin=[0, 1, 0, 0],
                                    size=[-1, x.shape[1].value - 2, x.shape[2].value, x.shape[3].value])
            diff_y = tf.concat(axis=1, values=[first_row, diff_y_valid, last_row])

        return diff_x, diff_y

    def forward_gradient(self, x, name):
        assert len(x.shape) == 4

        with tf.variable_scope('forward_gradient'):
            x_ker_init = tf.constant_initializer([[-1, 1]])
            diff_x = tf.layers.conv2d(x, x.shape[-1].value, [1, 2], padding='same',
                                      kernel_initializer=x_ker_init, use_bias=False, name=name + '_diff_x',
                                      trainable=True)

            y_ker_init = tf.constant_initializer([[-1], [1]])
            diff_y = tf.layers.conv2d(x, x.shape[-1].value, [2, 1], padding='same',
                                      kernel_initializer=y_ker_init, use_bias=False, name=name + '_diff_y',
                                      trainable=True)

            # refine the boundary
            diff_x_valid = tf.slice(diff_x, begin=[0, 0, 0, 0],
                                    size=[-1, x.shape[1].value, x.shape[2].value - 1, x.shape[3].value])
            last_col = tf.zeros([tf.shape(x)[0], x.shape[1].value, 1, x.shape[3].value], dtype=tf.float32)
            diff_x = tf.concat(axis=2, values=[diff_x_valid, last_col])

            diff_y_valid = tf.slice(diff_y, begin=[0, 0, 0, 0],
                                    size=[-1, x.shape[1].value - 1, x.shape[2].value, x.shape[3].value])
            last_row = tf.zeros([tf.shape(x)[0], 1, x.shape[2].value, x.shape[3].value], dtype=tf.float32)
            diff_y = tf.concat(axis=1, values=[diff_y_valid, last_row])

        return diff_x, diff_y

    def divergence(self, x, y, name):
        assert len(x.shape) == 4

        with tf.variable_scope('divergence'):
            x_valid = tf.slice(x, begin=[0, 0, 0, 0],
                               size=[-1, x.shape[1].value, x.shape[2].value - 1, x.shape[3].value])
            first_col = tf.zeros([tf.shape(x)[0], x.shape[1].value, 1, x.shape[3].value], dtype=tf.float32)
            x_pad = tf.concat(axis=2, values=[first_col, x_valid])

            y_valid = tf.slice(y, begin=[0, 0, 0, 0],
                               size=[-1, y.shape[1].value - 1, y.shape[2].value, y.shape[3].value])
            first_row = tf.zeros([tf.shape(y)[0], 1, y.shape[2].value, y.shape[3].value], dtype=tf.float32)
            y_pad = tf.concat(axis=1, values=[first_row, y_valid])

            x_ker_init = tf.constant_initializer([[-1, 1]])
            diff_x = tf.layers.conv2d(x_pad, x.shape[-1].value, [1, 2], padding='same',
                                      kernel_initializer=x_ker_init, use_bias=False, name=name + '_diff_x',
                                      trainable=True)

            y_ker_init = tf.constant_initializer([[-1], [1]])
            diff_y = tf.layers.conv2d(y_pad, y.shape[-1].value, [2, 1], padding='same',
                                      kernel_initializer=y_ker_init, use_bias=False, name=name + '_diff_y',
                                      trainable=True)

        div = diff_x + diff_y
        return div

    def zoom_size(self, height, width, factor):
        new_height = int(float(height) * factor + 0.5)
        new_width = int(float(width) * factor + 0.5)

        return new_height, new_width

    def zoom_image(self, x, new_height, new_width):
        assert len(x.shape) == 4

        delta = tf.zeros((tf.shape(x)[0], 2, new_height * new_width))
        zoomed_x = spatial_transformer.transformer(x, delta, (new_height, new_width)) # 空间变换网络  x*delta is w*h
        return tf.reshape(zoomed_x, [tf.shape(x)[0], new_height, new_width, x.shape[-1].value])

    def dual_tvl1_optic_flow(self, x1, x2, u1, u2,
                             tau=0.25,  # time step
                             lbda=0.15,  # weight parameter for the data term
                             theta=0.3,  # weight parameter for (u - v)^2
                             warps=5,  # number of warpings per scale
                             max_iterations=5  # maximum number of iterations for optimization
                             ):
        # u1, u2, rho = self.dual_tvl1_optic_flow(down_x1, down_x2, u1, u2,
        #                                         tau=tau, lbda=lbda, theta=theta, warps=warps,
        #                                         max_iterations=max_iterations)
        l_t = lbda * theta
        taut = tau / theta

        diff2_x, diff2_y = self.centered_gradient(x2, 'x2')

        p11 = p12 = p21 = p22 = tf.zeros_like(x1)

        for warpings in range(warps):
            with tf.variable_scope('warping%d' % (warpings,)):
                u1_flat = tf.reshape(u1, (tf.shape(x2)[0], 1, x2.shape[1].value * x2.shape[2].value))
                u2_flat = tf.reshape(u2, (tf.shape(x2)[0], 1, x2.shape[1].value * x2.shape[2].value))

                x2_warp = self.warp_image(x2, u1_flat, u2_flat)
                x2_warp = tf.reshape(x2_warp, tf.shape(x2))

                diff2_x_warp = self.warp_image(diff2_x, u1_flat, u2_flat)
                diff2_x_warp = tf.reshape(diff2_x_warp, tf.shape(diff2_x))

                diff2_y_warp = self.warp_image(diff2_y, u1_flat, u2_flat)
                diff2_y_warp = tf.reshape(diff2_y_warp, tf.shape(diff2_y))

                diff2_x_sq = tf.square(diff2_x_warp)
                diff2_y_sq = tf.square(diff2_y_warp)

                grad = diff2_x_sq + diff2_y_sq + self.GRAD_IS_ZERO

                rho_c = x2_warp - diff2_x_warp * u1 - diff2_y_warp * u2 - x1

                for ii in range(max_iterations):
                    with tf.variable_scope('iter%d' % (ii,)):
                        rho = rho_c + diff2_x_warp * u1 + diff2_y_warp * u2 + self.GRAD_IS_ZERO;

                        masks1 = rho < -l_t * grad
                        d1_1 = tf.where(masks1, l_t * diff2_x_warp, tf.zeros_like(diff2_x_warp))
                        d2_1 = tf.where(masks1, l_t * diff2_y_warp, tf.zeros_like(diff2_y_warp))

                        masks2 = rho > l_t * grad
                        d1_2 = tf.where(masks2, -l_t * diff2_x_warp, tf.zeros_like(diff2_x_warp))
                        d2_2 = tf.where(masks2, -l_t * diff2_y_warp, tf.zeros_like(diff2_y_warp))

                        masks3 = (~masks1) & (~masks2) & (grad > self.GRAD_IS_ZERO)
                        d1_3 = tf.where(masks3, -rho / grad * diff2_x_warp, tf.zeros_like(diff2_x_warp))
                        d2_3 = tf.where(masks3, -rho / grad * diff2_y_warp, tf.zeros_like(diff2_y_warp))

                        v1 = d1_1 + d1_2 + d1_3 + u1
                        v2 = d2_1 + d2_2 + d2_3 + u2

                        u1 = v1 + theta * self.divergence(p11, p12, 'div_p1')
                        u2 = v2 + theta * self.divergence(p21, p22, 'div_p2')

                        u1x, u1y = self.forward_gradient(u1, 'u1')
                        u2x, u2y = self.forward_gradient(u2, 'u2')

                        p11 = (p11 + taut * u1x) / (
                            1.0 + taut * tf.sqrt(tf.square(u1x) + tf.square(u1y) + self.GRAD_IS_ZERO));
                        p12 = (p12 + taut * u1y) / (
                            1.0 + taut * tf.sqrt(tf.square(u1x) + tf.square(u1y) + self.GRAD_IS_ZERO));
                        p21 = (p21 + taut * u2x) / (
                            1.0 + taut * tf.sqrt(tf.square(u2x) + tf.square(u2y) + self.GRAD_IS_ZERO));
                        p22 = (p22 + taut * u2y) / (
                            1.0 + taut * tf.sqrt(tf.square(u2x) + tf.square(u2y) + self.GRAD_IS_ZERO));

        return u1, u2, rho

    def tvnet_flow(self, x1, x2,
                    tau=0.25,  # 时间步长
                    lbda=0.15,  # 每一个数据项的参数权重
                    theta=0.3,  # weight parameter for (u - v)^2
                    warps=5,  # number of warpings per scale
                    zfactor=0.5,  # factor for building the image piramid  图像piramid的构建因素
                    max_scales=5,  # maximum number of scales for image piramid 图像piramid的最放缩倍数
                    max_iterations=5  # maximum number of iterations for optimization 为优化而第二代的最大数量
                    ):

        for i in range(len(x1.shape)):#找到第一个不一致的点
            assert x1.shape[i].value == x2.shape[i].value#assert是断言的意思，解释为：我断定这个程序执行之后或者之前会有这样的结果，如果不是，那就扔出一个错误。

        zfactor = np.float32(zfactor)#定义一个float的变量

        height = x1.shape[-3].value#取图片的高和宽
        width = x1.shape[-2].value

        n_scales = 1 + np.log(np.sqrt(height ** 2 + width ** 2) / 4.0) / np.log(1 / zfactor);####？
        n_scales = min(n_scales, max_scales)
        # n_scales = 1
        with tf.variable_scope('tvl1_flow'): # 用variable_scope定义一个名为'tvl1_flow'变量父空间
            grey_x1 = self.grey_scale_image(x1) # 卷积灰化
            grey_x2 = self.grey_scale_image(x2)
            norm_imgs = self.normalize_images(grey_x1, grey_x2) # 将两个图像归一化返回一个数组形式的归一

            smooth_x1 = self.gaussian_smooth(norm_imgs[0]) # 卷积平滑处理
            smooth_x2 = self.gaussian_smooth(norm_imgs[1])
            for ss in range(n_scales - 1, -1, -1):#在xrange以1的步长倒着  倒金字塔
                print("ss:",ss)
                with tf.variable_scope('scale%d' % ss):
                    down_sample_factor = zfactor ** ss   # 下降样例因子
                    down_height, down_width = self.zoom_size(height, width, down_sample_factor)#放缩后的宽高

                    if ss == n_scales - 1:#如果ss是缩放比
                        u1 = tf.get_variable('u1', shape=[1, down_height, down_width, 1], dtype=tf.float32,
                                             initializer=tf.zeros_initializer)
                        u2 = tf.get_variable('u2', shape=[1, down_height, down_width, 1], dtype=tf.float32,
                                             initializer=tf.zeros_initializer)
                        u1 = tf.tile(u1, [tf.shape(smooth_x1)[0], 1, 1, 1])#tile(A,n)把A复制n次  1
                        u2 = tf.tile(u2, [tf.shape(smooth_x1)[0], 1, 1, 1])

                    down_x1 = self.zoom_image(smooth_x1, down_height, down_width) # 变焦
                    down_x2 = self.zoom_image(smooth_x2, down_height, down_width)

                    u1, u2, rho = self.dual_tvl1_optic_flow(down_x1, down_x2, u1, u2,
                                                            tau=tau, lbda=lbda, theta=theta, warps=warps,
                                                            max_iterations=max_iterations)

                    if ss == 0:
                        return u1, u2, rho

                    up_sample_factor = zfactor ** (ss - 1)
                    up_height, up_width = self.zoom_size(height, width, up_sample_factor)
                    u1 = self.zoom_image(u1, up_height, up_width) / zfactor
                    u2 = self.zoom_image(u2, up_height, up_width) / zfactor

    def get_loss(self, x1, x2,
                 tau=0.25,  # time step
                 lbda=0.15,  # weight parameter for the data term
                 theta=0.3,  # weight parameter for (u - v)^2
                 warps=5,  # number of warpings per scale
                 zfactor=0.5,  # factor for building the image piramid
                 max_scales=5,  # maximum number of scales for image piramid
                 max_iterations=5  # maximum number of iterations for optimization
                 ):

        u1, u2, rho = self.tvnet_flow(x1, x2,
                                      tau=tau, lbda=lbda, theta=theta, warps=warps,
                                      zfactor=zfactor, max_scales=max_scales,
                                      max_iterations=max_iterations)

        # computing loss
        u1x, u1y = self.forward_gradient(u1, 'u1')
        u2x, u2y = self.forward_gradient(u2, 'u2')


        u1_flat = tf.reshape(u1, (tf.shape(x2)[0], 1, x2.shape[1].value * x2.shape[2].value))
        u2_flat = tf.reshape(u2, (tf.shape(x2)[0], 1, x2.shape[1].value * x2.shape[2].value))

        x2_warp = self.warp_image(x2, u1_flat, u2_flat)
        x2_warp = tf.reshape(x2_warp, tf.shape(x2))
        loss = lbda * tf.reduce_mean(tf.abs(x2_warp - x1)) + tf.reduce_mean(
            tf.abs(u1x) + tf.abs(u1y) + tf.abs(u2x) + tf.abs(u2y))
        return loss, u1, u2

    def MatrixToImage(self,data):
        from PIL import Image
        data = data * 255
        new_im = Image.fromarray(data.astype(np.uint8))
        return new_im