作为入坑进深度学习的小白,第一篇复现的论文是《Pyramid Stereo Matching Network》,代码已经由作者开源,链接:https://github.com/JiaRenChang/PSMNet
代码大致读懂,将一些代码po出,做简单注释。代码 、注释、下面的备注要结合看哦。代码只针对KITTI2015,其他训练集没有用到。
数据预处理
讲一下KITT2015的预处理部分。
dataloader/KITTIloader2015.py
class myImageFloder(data.Dataset):
def __init__(self, left, right, left_disparity, training, loader=default_loader, dploader= disparity_loader):
self.left = left
self.right = right
self.disp_L = left_disparity
self.loader = loader
self.dploader = dploader
self.training = training
def __getitem__(self, index):
left = self.left[index]
right = self.right[index]
disp_L= self.disp_L[index]
left_img = self.loader(left)
right_img = self.loader(right)
dataL = self.dploader(disp_L)
if self.training:
w, h = left_img.size
th, tw = 256, 512
x1 = random.randint(0, w - tw)
y1 = random.randint(0, h - th)
left_img = left_img.crop((x1, y1, x1 + tw, y1 + th))
right_img = right_img.crop((x1, y1, x1 + tw, y1 + th))
left_img = np.array(left_img, dtype=np.uint8)
right_img = np.array(right_img, dtype=np.uint8)
dataL = np.ascontiguousarray(dataL,dtype=np.float32)/256
dataL = dataL[y1:y1 + th, x1:x1 + tw]
processed = preprocess.get_transform(augment=False)
left_img = processed(left_img)
right_img = processed(right_img)
return left_img, right_img, dataL
else:
"""
w, h = left_img.size
left_img = left_img.crop((w - 1232, h - 368, w, h))
right_img = right_img.crop((w - 1232, h - 368, w, h))
#w1, h1 = left_img.size
dataL = dataL.crop((w - 1232, h - 368, w, h))
dataL = np.ascontiguousarray(dataL, dtype=np.float32)/ 256
processed = preprocess.get_transform(augment=False)
left_img = processed(left_img)
right_img = processed(right_img)
"""
w, h = left_img.size
th, tw = 256, 512
x1 = random.randint(0, w - tw)
y1 = random.randint(0, h - th)
left_img = left_img.crop((x1, y1, x1 + tw, y1 + th))
right_img = right_im