Localisation net
输入batch_images(shape:[b,h,w,c])输出batch_theta(shape:[b,2,3]),取决于具体 网络这里不再陈述。Grid generator
和Sample
实现如下
import tensorflow as tf
import numpy as np
H:\anaconda3\envs\tf\lib\site-packages\scipy\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
def affine_grid ( batch_theta, batch_output_shape) :
"""
batch_theta:
shape: [b,2,3]
batch_output_shape:
value: [b,oh,ow,c]
return:
batch_affine_grid:
shape: [b,oh,ow,2]
"""
oh = batch_output_shape[ 1 ]
ow = batch_output_shape[ 2 ]
oh_max = 1 - 1 / oh
oh_min = - oh_max
ow_max = 1 - 1 / ow
ow_min = - ow_max
oh_lim = tf. cast( tf. linspace( oh_min, oh_max, oh) , dtype= tf. float32)
ow_lim = tf. cast( tf. linspace( ow_min, ow_max, ow) , dtype= tf. float32)
h_mt, w_mt = tf. meshgrid( oh_lim, ow_lim, indexing= 'ij' )
position_hw1 = tf. concat( [ h_mt[ . . . , tf. newaxis] , w_mt[ . . . , tf. newaxis] , tf. ones_like( h_mt, dtype= tf. float32) [ . . . , tf. newaxis] ] , axis= - 1 )
batch_position_hw1 = tf. tile( position_hw1[ tf. newaxis, . . . ] , [ batch_output_shape[ 0 ] , 1 , 1 , 1 ] )
batch_theta_transpose = tf. transpose( batch_theta, [ 0 , 2 , 1 ] )
batch_affine_grid = tf. einsum( 'bhwx,bxn -> bhwn' , batch_position_hw1, batch_theta_transpose)
return batch_affine_grid
def grid_sample ( batch_input, batch_affine_grid) :
"""
method: bilinear
batch_input:
shape: [b,ih,iw,c]
batch_affine_grid:
shape: [b,oh,ow,2]
return:
batch_result_image:
shape: [b,oh,ow,c]
"""
batch_input_shape = tf. shape( batch_input)
ih, iw = batch_input_shape[ 1 ] , batch_input_shape[ 2 ]
h_mt, w_mt = tf. meshgrid( tf. range ( batch_input_shape[ 1 ] , dtype= tf. float32) , tf. range ( batch_input_shape[ 2 ] , dtype= tf. float32) , indexing= 'ij' )
position_hw = tf. concat( [ h_mt[ . . . , tf. newaxis] , w_mt[ . . . , tf. newaxis] ] , axis= - 1 )
batch_affine_grid_shape = tf. shape( batch_affine_grid)
oh, ow = batch_affine_grid_shape[ 1 ] , batch_affine_grid_shape[ 2 ]
oh_max, ow_max = tf. cast( 1 - 1 / oh, dtype= tf. float32) , tf. cast( 1 - 1 / ow, dtype= tf. float32)
oh_min, ow_min = - oh_max, - ow_max
batch_affine_grid = ( batch_affine_grid- tf. convert_to_tensor( [ oh_min, ow_min] , dtype= tf. float32) ) / tf. convert_to_tensor( [ oh_max- oh_min, ow_max- ow_min] , dtype= tf. float32)
batch_affine_grid = batch_affine_grid * tf. convert_to_tensor( [ ih- 1 , iw- 1 ] , dtype= tf. float32)
h = batch_affine_grid[ . . . , 0 : 1 ]
w = batch_affine_grid[ . . . , 1 : 2 ]
h1 = tf. cast( tf. floor( batch_affine_grid[ . . . , 0 : 1 ] ) , dtype= tf. int32)
h2 = h1 + 1
w1 = tf. cast( tf. floor( batch_affine_grid[ . . . , 1 : 2 ] ) , dtype= tf. int32)
w2 = w1 + 1
h1 = tf. clip_by_value( h1, 0 , ih- 1 )
h2 = tf. clip_by_value( h2, 0 , ih- 1 )
w1 = tf. clip_by_value( w1, 0 , iw- 1 )
w2 = tf. clip_by_value( w2, 0 , iw- 1 )
h1w1 = tf. concat( [ h1, w1] , axis= - 1 )
h1w2 = tf. concat( [ h1, w2] , axis= - 1 )
h2w1 = tf. concat( [ h2, w1] , axis= - 1 )
h2w2 = tf. concat( [ h2, w2] , axis= - 1 )
fh1w1 = tf. cast( tf. gather_nd( batch_input, h1w1, batch_dims= 1 ) , dtype= tf. float32)
fh1w2 = tf. cast( tf. gather_nd( batch_input, h1w2, batch_dims= 1 ) , dtype= tf. float32)
fh2w1 = tf. cast( tf. gather_nd( batch_input, h2w1, batch_dims= 1 ) , dtype= tf. float32)
fh2w2 = tf. cast( tf. gather_nd( batch_input, h2w2, batch_dims= 1 ) , dtype= tf. float32)
h1 = tf. cast( h1, dtype= tf. float32)
h2 = tf. cast( h2, dtype= tf. float32)
w1 = tf. cast( w1, dtype= tf. float32)
w2 = tf. cast( w2, dtype= tf. float32)
fP = ( h2- h) * ( w2- w) * fh1w1 + ( h2- h) * ( w- w1) * fh1w2 + ( h- h1) * ( w2- w) * fh2w1 + ( h- h1) * ( w- w1) * fh2w2
return fP
image = tf. keras. utils. load_img( '000005.jpg' )
image = tf. keras. utils. img_to_array( image)
batch_nums = 64
batch_image = tf. tile( image[ tf. newaxis, . . . ] , [ batch_nums, 1 , 1 , 1 ] )
tf. keras. utils. array_to_img( image)
tf. shape( batch_image)
<tf.Tensor: shape=(4,), dtype=int32, numpy=array([ 64, 375, 500, 3])>
thetas = tf. constant( [ [ 0.5 , 0. , 0. ] , [ 0. , 0.5 , 0. ] ] , dtype= tf. float32)
out_h = 375
out_w = 500
r = affine_grid( tf. tile( thetas[ tf. newaxis, . . . ] , [ batch_nums, 1 , 1 ] ) , [ batch_nums, out_h, out_w, 3 ] )
r2 = grid_sample( batch_image, r)
img = tf. keras. utils. array_to_img( r2[ 0 ] )
img
r2. shape
TensorShape([64, 375, 500, 3])