Make3d数据集使用方法

最新推荐文章于 2024-05-06 15:35:08 发布

Jamesgender

最新推荐文章于 2024-05-06 15:35:08 发布

阅读量811

点赞数 3

文章标签： pytorch 深度学习 python

本文链接：https://blog.csdn.net/Jamesgender/article/details/134420950

版权

单目深度估计常用数据集之一make3d。

下载链接：

Make3D --- Range Image Dataset

由于一般该数据集只用来测试在KITTI数据集上训练好的权重的性能，也就是只作为测试集，因此只下载134张图片与对应的深度mat。

下载之后得到两个文件夹，将文件夹解压到一个目录下，并使用一个提取名字的脚本来获得包含所有测试集名字的txt文件。

# 获得make3d图片的名称

import glob
imglist = glob.glob('E:/dataset/make3d/Test134/*.jpg')

with open('make3d_test_files.txt', 'a+') as f:
    for i in imglist:
        _, name = i.split('\\')
        f.write('{}\n'.format(name[:-4]))

最终数据集包含如下：

现在就可以使用验证脚本来验证模型在make3d数据集上的精度了。

# make3d 数据集评估， 以Monodepth2为例
from layers import disp_to_depth
import networks
import cv2
import os 
import torch
import scipy.misc
from scipy import io
import numpy as np
load_weights_folder = './models/mono_resnet50_640x192'
main_path = 'E:/dataset/make3d'
encoder_path = os.path.join(load_weights_folder, "encoder.pth")
decoder_path = os.path.join(load_weights_folder, "depth.pth")
encoder_dict = torch.load(encoder_path)
encoder = networks.ResnetEncoder(50, False)
depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

model_dict = encoder.state_dict()
encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
depth_decoder.load_state_dict(torch.load(decoder_path))

#encoder.cuda()
encoder.eval()
#depth_decoder.cuda()
depth_decoder.eval()
def compute_errors(gt, pred):
    rmse = (gt - pred) ** 2
    rmse = np.sqrt(rmse.mean())

    rmse_log = (np.log10(gt) - np.log10(pred)) ** 2
    rmse_log = np.sqrt(rmse_log.mean())

    abs_rel = np.mean(np.abs(gt - pred) / gt)

    sq_rel = np.mean(((gt - pred)**2) / gt)

    return abs_rel, sq_rel, rmse, rmse_log


with open(os.path.join(main_path, "make3d_test_files.txt")) as f:
    test_filenames = f.read().splitlines()
test_filenames = map(lambda x: x[4:], test_filenames)


depths_gt = []
images = []
ratio = 2
h_ratio = 1 / (1.33333 * ratio)
color_new_height = int(1704 / 2)
depth_new_height = 21
for filename in test_filenames:
    mat = io.loadmat(os.path.join(main_path, "Gridlaserdata", "depth_sph_corr-{}.mat".format(filename)))
    depths_gt.append(mat["Position3DGrid"][:,:,3])
    
    image = cv2.imread(os.path.join(main_path, "Test134", "img-{}.jpg".format(filename)))
    image = image[ int((2272 - color_new_height)/2):int((2272 + color_new_height)/2),:,:]
    images.append(image[:,:,::-1])
depths_gt_resized = map(lambda x: cv2.resize(x, (305, 407), interpolation=cv2.INTER_NEAREST), depths_gt)
depths_gt_cropped = map(lambda x: x[int((55 - 21)/2):int((55 + 21)/2),:], depths_gt)

depths_gt_cropped = list(depths_gt_cropped)
errors = []
with torch.no_grad():
    for i in range(len( images)):
        input_color = images[i]
        input_color =  cv2.resize(input_color/255.0, (640, 192), interpolation=cv2.INTER_NEAREST)#<----1
        input_color = torch.tensor(input_color, dtype = torch.float).permute(2,0,1)[None,:,:,:]
        output = depth_decoder(encoder(input_color))
        pred_disp,_ = disp_to_depth(output[("disp", 0)], 0.1, 100) #<---2
        pred_disp = pred_disp.squeeze().cpu().numpy()
        depth_gt = depths_gt_cropped[i]
        depth_pred = 1 / pred_disp
        depth_pred = cv2.resize(depth_pred, depth_gt.shape[::-1], interpolation=cv2.INTER_NEAREST)
        mask = np.logical_and(depth_gt > 0, depth_gt < 70)
        depth_gt = depth_gt[mask]
        depth_pred = depth_pred[mask]
        depth_pred *= np.median(depth_gt) / np.median(depth_pred)
        depth_pred[depth_pred > 70] = 70
        errors.append(compute_errors(depth_gt, depth_pred))
    mean_errors = np.mean(errors, 0)
    

print(("{:>8} | " * 4).format( "abs_rel", "sq_rel", "rmse", "rmse_log"))
print(("{: 8.3f} , " * 4).format(*mean_errors.tolist()))

得到结果如下：