Is Someone Speaking? Exploring Long-term Temporal Features for Audio-visual Active Speaker Detection-CSDN博客

本文链接：https://blog.csdn.net/weixin_44809603/article/details/133555557

此博客主要介绍如何跑通此代码。
代码链接 : code
论文链接：paper

主要会出现的问题：

使用服务器下载数据集太慢，于是博主在windows系统写了同样的代码将数据集下载到本地（亲测下载速度比较快），如下：

import urllib.request
from tqdm import tqdm
with open('1.txt', 'r') as file:
    video_file_names = file.read().splitlines()
print(video_file_names)

output_directory = 'downloaded_videos'


import os

os.makedirs(output_directory, exist_ok=True)


for video_name in video_file_names:
    video_url = f"https://s3.amazonaws.com/ava-dataset/trainval/{video_name}"
    output_file = os.path.join(output_directory, video_name)

    try:
        # 使用 urllib.request.urlopen 打开链接
        with urllib.request.urlopen(video_url) as response, open(output_file, 'wb') as output_file:
            # 获取文件大小（字节）
            file_size = int(response.headers["Content-Length"])

            # 创建 tqdm 进度条
            progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc=video_name)

            # 逐块下载并写入文件，同时更新进度条
            while True:
                data = response.read(1024)
                if not data:
                    break
                output_file.write(data)
                progress_bar.update(len(data))

            # 关闭进度条
            progress_bar.close()

        print(f"视频 '{video_name}' 已成功下载到 {output_file}")
    except Exception as e:
        print(f"下载视频 '{video_name}' 时出错: {e}")