RealEstate10k数据集下载使用流程

PUBGprofessor

于 2025-03-26 01:53:59 发布

阅读量881

点赞数 27

文章标签：深度学习人工智能 python

本文链接：https://blog.csdn.net/m0_74167177/article/details/146517020

版权

一、数据集介绍

RealEstate10K的相机轨迹可以在这里下载: RealEstate10K.tgz (720MB)

数据由一组.txt文件组成，每个视频剪辑一个，指定该剪辑中帧的时间戳和姿势。对于学习应用程序，可以从训练片段中采样帧，以便学习，例如，视图合成模型。在谷歌2018年的SIGGRAPH论文 Stereo Magnification: Learning view synthesis using multiplane images, 例如，在训练过程中，从每个片段中采样三元组帧，两个用于预测模型，第三个作为基础真值，用于计算用于训练网络的视图合成损失。

本数据由谷歌LLC根据[知识共享署名4.0国际许可协议] Creative Commons Attribution 4.0 International License.授权使用。

数据被分成train和test子目录，每个子目录都有一组。txt文件，每个视频剪辑一个。txt文件（大约90%的剪辑在train中，剩下的10%在test中）。每个“。txt”文件的格式如下：

  <Video URL>
  <frame1>
  <frame2>
  <...>

其中每个帧线具有以下19列：

     1. timestamp (int: microseconds since start of video)

   2-6. camera intrinsics (float: focal_length_x, focal_length_y, principal_point_x, principal_point_y)

  7-19. camera pose (floats forming 3x4 matrix in row-major order)

其中，每条帧线有以下19列：相机内部函数可以组织成3x3矩阵K，相机姿态参数可以组织成3x4矩阵P=[R|t]，使得矩阵KP将世界坐标系中的（齐次）3D点P映射到图像中的（齐次）2D点。

相机内部函数以分辨率无关的归一化图像坐标表示，其中图像的左上角为（0,0），图像的右下角为（1,1）。这允许通过根据像素的图像大小对帧进行缩放，将内在参数应用于磁盘上表示的任何分辨率的帧（或调整到训练前的大小）。对于分辨率宽度x高度像素的图像，图像实际比例的内部矩阵为

在这里插入图片描述

二、数据集下载

可以在上述链接下载所用的txt文件，train和test总共约80,000个场景

然后由txt文件下载YouTube视频，并根据帧剪成图片：

复制https://github.com/Findeton/real-state-10k/blob/main/downloadAndProcess.py，把 downloadAndProcess.py 文件与RealEstate10K文件夹放在同一目录下
导出自己浏览器在YouTube的cookies为cookies.txt，放在当前目录下
新建一个downloaded空文件夹在当前目录下

环境配置：

可访问YouTube
python3.8+，所需库：cv2， yt-dlp（之前版本为youtube-dl，好像已弃用），pip install即可

修改py文件：

修改downloadAndProcess.py 文件第48行：

return_code = call(["youtube-dl", "-f", "bestvideo[height<=480]", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])

为

return_code = call(["yt-dlp", "-f", "bestvideo", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])

(使用yt-dlp，且取消分辨率为480p的限制)

运行：

当前文件夹下，cmd：
```
python .\downloadAndProcess.py
```

即可开始下载所有数据集，可随时中断，会自动保存下载进度

三、数据集查看

在当前目录的transcode文件夹下, 为每个场景的图片

在当前目录的downloaded文件夹下, 为每个场景的视频（无后缀）

相机参数可能还要自己解析

四、代码解读

import cv2
import os
import random
from subprocess import call
import pickle

outputResultPath = './transcode/'
basePath = './RealEstate10K/'

# set of videos that were not found
notFoundVideosPath = "./downloaded/notFound.pkl"

def loadNotFoundVideos():
    # 返回未找到的视频的集合
    if os.path.exists(notFoundVideosPath):
        with open(notFoundVideosPath, 'rb') as f:
            return pickle.load(f)
    else:
        return set()

notFoundVideos = loadNotFoundVideos() # 未找到的视频的集合

processedTxtFilesPath = "./processedTxtFiles.pkl"

def loadProcessedTxtFiles():
    # 返回已完全处理的视频ID的集合
    if os.path.exists(processedTxtFilesPath):
        with open(processedTxtFilesPath, 'rb') as f:
            return pickle.load(f)
    else:
        return set()

# These are the txt files that were SUCCESSFULLY processed
# This is useful to create a "golden" record of the dataset
processedTxtFiles = loadProcessedTxtFiles() # 已完全处理的视频所在的txt文件名的集合

def downloadVideo(videoPathURL, notFoundVideos):
    """
    Download the video from the URL
    若视频未找到，则将其添加到未找到的视频集合中
    若视频已经存在，则跳过
    返回error：若在NotfoundVideos里面或下载失败，则返回DOWNLOAD_ERROR
               若已存在或下载成功，则返回False
    """
    youtubeIDOffset = videoPathURL.find("/watch?v=") + len('/watch?v=')

    youtubeID = videoPathURL[youtubeIDOffset:]
    targetPath = "./downloaded/{}".format(youtubeID)
    
    if youtubeID in notFoundVideos:
        return targetPath, "DOWNLOAD_ERROR", notFoundVideos, youtubeID
    
    if os.path.exists(targetPath):
        print('Skipped {}, warning EXISTS'.format(targetPath))
        return targetPath, False, notFoundVideos, youtubeID

    # return_code = call(["youtube-dl", "-f", "bestvideo[height<=480]", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])
    # return_code = call(["yt-dlp", "-f", "bestvideo[height<=480]", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])
    return_code = call(["yt-dlp", "-f", "bestvideo", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])
    error = False if return_code == 0 else "DOWNLOAD_ERROR"
    
    if "DOWNLOAD_ERROR" == error:
        notFoundVideos.add(youtubeID)
        with open(notFoundVideosPath, 'wb') as f:
            pickle.dump(notFoundVideos, f)

    return targetPath, error, notFoundVideos, youtubeID

def getBestMatchingFrames(frameTimeStamp, case, maxFrameMatchingDistanceInNS=8000):
    matches = []
    for caseIdx, c in enumerate(case):
        distance = abs(c['timeStamp'] - frameTimeStamp)
        if distance < maxFrameMatchingDistanceInNS:
            #print(c['timeStamp'], frameTimeStamp)
            #print('case index', caseIdx, 'distance',distance)
            matches.append({
                'caseIdx': caseIdx,
                'distance': distance,
            })

    matches.sort(key=lambda x: x['distance'])
    return matches

for rootPath in os.listdir(basePath):
    if 'download' in rootPath:
        continue

    subRootPath = os.path.join(basePath, rootPath) # trian或test的path
    for subPath in os.listdir(subRootPath):
        dataFilePath = os.path.join(subRootPath, subPath) # .txt的path

        case = []

        with open(dataFilePath) as f:
            # 读取url、内外参等信息
            videoPathURL = f.readline().rstrip()
            # process all the rest of the lines 	
            for l in f.readlines():
                line = l.split(' ')

                timeStamp = int(line[0])
                intrinsics = [float(i) for i in line[1:7]]
                pose = [float(i) for i in line[7:19]]
                case.append({
                    'timeStamp': timeStamp, 
                    'intrinsics': intrinsics,
                    'pose': pose})

        downloadedVideoPath, error, notFoundVideos, youtubeID = downloadVideo(videoPathURL, notFoundVideos)
        
        if error != False:
            print('Skipped {}, error {}'.format(downloadedVideoPath, error))
            continue

        # build out the specific frames for the case
        video = cv2.VideoCapture(downloadedVideoPath) 
        video.set(cv2.CAP_PROP_POS_MSEC, 0)
        #import pdb; pdb.set_trace()

        while video.isOpened(): 
            frameOK, imgFrame = video.read() 
            if frameOK == False:
                print('video processing complete')
                break

            frameTimeStamp = (int)(round(video.get(cv2.CAP_PROP_POS_MSEC)*1000))

            matches = getBestMatchingFrames(frameTimeStamp, case, 1e9 / (2* video.get(cv2.CAP_PROP_FPS)))
            for match in matches:
                caseOffset = match['caseIdx']
                distance = match['distance']
                # match was successful, write frame
                imageOutputDir = os.path.join(outputResultPath, youtubeID)
                
                if not os.path.exists(imageOutputDir):
                    os.makedirs(imageOutputDir)
                imageOutputPath = os.path.join(imageOutputDir, '{}.jpg'.format(case[caseOffset]['timeStamp']) )
                
                if not os.path.exists(imageOutputPath):
                    print("Writing {} for frame {}, distance {}".format(imageOutputPath, case[caseOffset]['timeStamp'], distance))
                    cv2.imwrite(imageOutputPath, imgFrame)

                case[caseOffset]['imgPath'] = imageOutputPath
        
        # write the case file to disk
        processedTxtFiles.add(subPath)
        with open(processedTxtFilesPath, 'wb') as f:
            pickle.dump(processedTxtFiles, f)
        #caseFileOutputPath = os.path.join(imageOutputDir, 'case.pkl')
        #with open(caseFileOutputPath, 'wb') as f:
        #    pickle.dump(case, f)