代码解读:基于深度学习的单目深度估计(3)
今天接着分析后面的函数get_predicted_depth_region(),
def get_predicted_depth_region(self):
'''
Returns the region of a 320x240 image covered by the predicted
depth map (y0 y1 x0 x1) where y runs the 240-dim and x runs the 320-dim.
'''
(orig_h, orig_w) = self.orig_input_size # input before transforms
(input_h, input_w) = self.input_size # input after transforms
dt = self.target_crop # net output size difference from valid convs
off_h = (orig_h - input_h + dt) / 2
off_w = (orig_w - input_w + dt) / 2
return (off_h, off_h + input_h,
off_w, off_w + input_w)
不纠结具体细节,可以知道这是一个修改输出深度图尺寸的函数320×240
再来看下一段函数,
def define_machine(self):
self.orig_input_size = (240, 320) #
self.input_size = (228, 304) # 采用random crop的方法吗
self.output_size = self.conf.geteval('full2', 'output_size')#获取配置文件中,full2层下的选项output_size
(input_h, input_w) = self.input_size
(output_h, output_w) = self.output_size
#因为输出与输出的比例是4倍,所以我们需要回溯输入图片对应的区域
self.target_crop = input_h - output_h * 4
assert self.target_crop == input_w - output_w * 4
self.define_meta()
# input vars
images = T.tensor4('images')
depths = T.tensor3('depths')
masks = T.tensor3('masks')
test_values = self.make_test_values()
images.tag.test_value = test_values['images']
depths.tag.test_value = test_values['depths']
masks.tag.test_value = test_values['masks']
x0 = images
y0 = depths
m0 = masks
# downsample depth and mask by 4x
m0 = m0[:,1::4,1::4]
y0 = y0[:,1::4,1::4]
# 构建网络
# 这一部分的网络是粗网络的前半部分,结构与Alexnet相同。因为文献的部分参数采用的是Alexnet训练好的模型参数,然后在进行fine-tuning
self.define_imagenet_stack(x0)
# pretrained features are rather large, rescale down to nicer range
imnet_r5 = 0.01 * self.imagenet.r5
imnet_feats = imnet_r5.reshape((
self.bsize, T.prod(imnet_r5.shape[1:])))
# 这一部分的网络是粗网络的后半部分
self.define_coarse_stack(imnet_feats)
# fine stack
self.define_fine_stack(x0)
self.vars = MachinePart(locals())
从这段代码可以了解到:
1,定义以及初始化input_size和output_size,但是注意输入图片是输出图片的4倍
2,定义以及初始化输入变量images,depths,masks
3,初始化深度网络各个结构的的stack
4,定义图像的meta信息,即图像均值方差,深度图均值方差,以及对数深度图均值方差
接下来就仔细研究define_machine()引用的几个子函数,
先看一个简单的,
def define_meta(self):
'''
precomputed means and stdev
'''
# just hardcoding for this release, was in meta.mat file
images_mean = 109.31410628
images_std = 76.18328376
images_istd = 1.0 / images_std
depths_mean = 2.53434899
depths_std = 1.22576694
depths_istd = 1.0 / depths_std
logdepths_mean = 0.82473954
logdepths_std = 0.45723134
logdepths_istd = 1.0 / logdepths_std
self.meta = MachinePart(locals())
很简单,定义了图像均值方差,深度图均值方差,以及对数深度图均值方差
再往下看,
def make_test_values(self):
(input_h, input_w) = self.input_size
(output_h, output_w) = self.output_size
test_images_size = (self.bsize, 3, input_h, input_w)
test_depths_size = (self.bsize, output_h, output_w)
test_values = {}
test_values['images'] = \
(255 * np.random.rand(*test_images_size)).astype(np.float32)
test_values['depths'] = \
np.random.randn(*test_depths_size).astype(np.float32)
test_values['masks'] = \
np.ones(test_depths_size, dtype=np.float32)
return test_values
看上去不是很容易理解,来看一下什么地方引用了这个函数,在这里:
test_values = self.make_test_values()
images.tag.test_value = test_values['images']
depths.tag.test_value = test_values['depths']
masks.tag.test_value = test_values['masks']
tag.test_value又是什么意思呢?不晓得,姑且认为make_test是生成tag.test_value的
接下来看一下不同网络层的stack定义和初始化,
#在coarse部分,与Alexnet相同的部分
def define_imagenet_stack(self, x0):
print "create net"
conv1 = self.create_unit('imnet_conv1')
pool1 = self.create_unit('imnet_pool1')
conv2 = self.create_unit('imnet_conv2')
pool2 = self.create_unit('imnet_pool2')
conv3 = self.create_unit('imnet_conv3')
conv4 = self.create_unit('imnet_conv4')
conv5 = self.create_unit('imnet_conv5')
pool5 = self.create_unit('imnet_pool5')
z1 = conv1.infer(x0 - 128)
(p1, s1) = pool1.infer(z1)
r1 = cmrnorm(relu(p1))#局部对比度归一化层?
z2 = conv2.infer(r1)
(p2, s2) = pool2.infer(z2)
r2 = cmrnorm(relu(p2))
z3 = conv3.infer(r2)
r3 = relu(z3)
z4 = conv4.infer(r3)
r4 = relu(z4)
z5 = conv5.infer(r4)
(p5, s5) = pool5.infer(z5)
r5 = relu(p5)
self.imagenet = MachinePart(locals())
从这段代码可以了解到:
1,用self.create_unit初始化卷积层和池化层
2,然后建立这写卷积层和池化层的连接关系,构建ImageNet
下次再往后面分析!