一、数据集介绍
RealEstate10K的相机轨迹可以在这里下载: RealEstate10K.tgz (720MB)
数据由一组.txt文件组成,每个视频剪辑一个,指定该剪辑中帧的时间戳和姿势。对于学习应用程序,可以从训练片段中采样帧,以便学习,例如,视图合成模型。在谷歌2018年的SIGGRAPH论文 Stereo Magnification: Learning view synthesis using multiplane images, 例如,在训练过程中,从每个片段中采样三元组帧,两个用于预测模型,第三个作为基础真值,用于计算用于训练网络的视图合成损失。
本数据由谷歌LLC根据[知识共享署名4.0国际许可协议] Creative Commons Attribution 4.0 International License.授权使用。
数据被分成train和test子目录,每个子目录都有一组。txt文件,每个视频剪辑一个。txt文件(大约90%的剪辑在train中,剩下的10%在test中)。每个“。txt”文件的格式如下:
<Video URL>
<frame1>
<frame2>
<...>
其中每个帧线具有以下19列:
1. timestamp (int: microseconds since start of video)
2-6. camera intrinsics (float: focal_length_x, focal_length_y, principal_point_x, principal_point_y)
7-19. camera pose (floats forming 3x4 matrix in row-major order)
其中,每条帧线有以下19列:相机内部函数可以组织成3x3矩阵K,相机姿态参数可以组织成3x4矩阵P=[R|t],使得矩阵KP将世界坐标系中的(齐次)3D点P映射到图像中的(齐次)2D点。
相机内部函数以分辨率无关的归一化图像坐标表示,其中图像的左上角为(0,0),图像的右下角为(1,1)。这允许通过根据像素的图像大小对帧进行缩放,将内在参数应用于磁盘上表示的任何分辨率的帧(或调整到训练前的大小)。对于分辨率宽度x高度像素的图像,图像实际比例的内部矩阵为
二、数据集下载
可以在上述链接下载所用的txt文件,train和test总共约80,000个场景
然后由txt文件下载YouTube视频,并根据帧剪成图片:
- 复制https://github.com/Findeton/real-state-10k/blob/main/downloadAndProcess.py,把 downloadAndProcess.py 文件与RealEstate10K文件夹放在同一目录下
- 导出自己浏览器在YouTube的cookies为cookies.txt,放在当前目录下
- 新建一个downloaded空文件夹在当前目录下
环境配置:
- 可访问YouTube
- python3.8+, 所需库:cv2, yt-dlp(之前版本为youtube-dl,好像已弃用),pip install即可
修改py文件:
- 修改downloadAndProcess.py 文件第48行:
return_code = call(["youtube-dl", "-f", "bestvideo[height<=480]", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])
为
return_code = call(["yt-dlp", "-f", "bestvideo", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])
(使用yt-dlp,且取消分辨率为480p的限制)
运行:
-
当前文件夹下,cmd:
python .\downloadAndProcess.py
即可开始下载所有数据集,可随时中断,会自动保存下载进度
三、数据集查看
在当前目录的transcode文件夹下, 为每个场景的图片
在当前目录的downloaded文件夹下, 为每个场景的视频(无后缀)
相机参数可能还要自己解析
四、代码解读
import cv2
import os
import random
from subprocess import call
import pickle
outputResultPath = './transcode/'
basePath = './RealEstate10K/'
# set of videos that were not found
notFoundVideosPath = "./downloaded/notFound.pkl"
def loadNotFoundVideos():
# 返回未找到的视频的集合
if os.path.exists(notFoundVideosPath):
with open(notFoundVideosPath, 'rb') as f:
return pickle.load(f)
else:
return set()
notFoundVideos = loadNotFoundVideos() # 未找到的视频的集合
processedTxtFilesPath = "./processedTxtFiles.pkl"
def loadProcessedTxtFiles():
# 返回已完全处理的视频ID的集合
if os.path.exists(processedTxtFilesPath):
with open(processedTxtFilesPath, 'rb') as f:
return pickle.load(f)
else:
return set()
# These are the txt files that were SUCCESSFULLY processed
# This is useful to create a "golden" record of the dataset
processedTxtFiles = loadProcessedTxtFiles() # 已完全处理的视频所在的txt文件名的集合
def downloadVideo(videoPathURL, notFoundVideos):
"""
Download the video from the URL
若视频未找到,则将其添加到未找到的视频集合中
若视频已经存在,则跳过
返回error:若在NotfoundVideos里面或下载失败,则返回DOWNLOAD_ERROR
若已存在或下载成功,则返回False
"""
youtubeIDOffset = videoPathURL.find("/watch?v=") + len('/watch?v=')
youtubeID = videoPathURL[youtubeIDOffset:]
targetPath = "./downloaded/{}".format(youtubeID)
if youtubeID in notFoundVideos:
return targetPath, "DOWNLOAD_ERROR", notFoundVideos, youtubeID
if os.path.exists(targetPath):
print('Skipped {}, warning EXISTS'.format(targetPath))
return targetPath, False, notFoundVideos, youtubeID
# return_code = call(["youtube-dl", "-f", "bestvideo[height<=480]", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])
# return_code = call(["yt-dlp", "-f", "bestvideo[height<=480]", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])
return_code = call(["yt-dlp", "-f", "bestvideo", videoPathURL, "-o", targetPath, "--cookies", "./cookies.txt" ])
error = False if return_code == 0 else "DOWNLOAD_ERROR"
if "DOWNLOAD_ERROR" == error:
notFoundVideos.add(youtubeID)
with open(notFoundVideosPath, 'wb') as f:
pickle.dump(notFoundVideos, f)
return targetPath, error, notFoundVideos, youtubeID
def getBestMatchingFrames(frameTimeStamp, case, maxFrameMatchingDistanceInNS=8000):
matches = []
for caseIdx, c in enumerate(case):
distance = abs(c['timeStamp'] - frameTimeStamp)
if distance < maxFrameMatchingDistanceInNS:
#print(c['timeStamp'], frameTimeStamp)
#print('case index', caseIdx, 'distance',distance)
matches.append({
'caseIdx': caseIdx,
'distance': distance,
})
matches.sort(key=lambda x: x['distance'])
return matches
for rootPath in os.listdir(basePath):
if 'download' in rootPath:
continue
subRootPath = os.path.join(basePath, rootPath) # trian或test的path
for subPath in os.listdir(subRootPath):
dataFilePath = os.path.join(subRootPath, subPath) # .txt的path
case = []
with open(dataFilePath) as f:
# 读取url、内外参等信息
videoPathURL = f.readline().rstrip()
# process all the rest of the lines
for l in f.readlines():
line = l.split(' ')
timeStamp = int(line[0])
intrinsics = [float(i) for i in line[1:7]]
pose = [float(i) for i in line[7:19]]
case.append({
'timeStamp': timeStamp,
'intrinsics': intrinsics,
'pose': pose})
downloadedVideoPath, error, notFoundVideos, youtubeID = downloadVideo(videoPathURL, notFoundVideos)
if error != False:
print('Skipped {}, error {}'.format(downloadedVideoPath, error))
continue
# build out the specific frames for the case
video = cv2.VideoCapture(downloadedVideoPath)
video.set(cv2.CAP_PROP_POS_MSEC, 0)
#import pdb; pdb.set_trace()
while video.isOpened():
frameOK, imgFrame = video.read()
if frameOK == False:
print('video processing complete')
break
frameTimeStamp = (int)(round(video.get(cv2.CAP_PROP_POS_MSEC)*1000))
matches = getBestMatchingFrames(frameTimeStamp, case, 1e9 / (2* video.get(cv2.CAP_PROP_FPS)))
for match in matches:
caseOffset = match['caseIdx']
distance = match['distance']
# match was successful, write frame
imageOutputDir = os.path.join(outputResultPath, youtubeID)
if not os.path.exists(imageOutputDir):
os.makedirs(imageOutputDir)
imageOutputPath = os.path.join(imageOutputDir, '{}.jpg'.format(case[caseOffset]['timeStamp']) )
if not os.path.exists(imageOutputPath):
print("Writing {} for frame {}, distance {}".format(imageOutputPath, case[caseOffset]['timeStamp'], distance))
cv2.imwrite(imageOutputPath, imgFrame)
case[caseOffset]['imgPath'] = imageOutputPath
# write the case file to disk
processedTxtFiles.add(subPath)
with open(processedTxtFilesPath, 'wb') as f:
pickle.dump(processedTxtFiles, f)
#caseFileOutputPath = os.path.join(imageOutputDir, 'case.pkl')
#with open(caseFileOutputPath, 'wb') as f:
# pickle.dump(case, f)