数据集下载:https://huggingface.co/datasets/mlfoundations/datacomp_large
import os
from huggingface_hub import snapshot_download
def download_parquet_files(repo_id, output_dir):
"""
Download .parquet files from a Hugging Face dataset repository using snapshot_download.
Args:
- repo_id (str): The ID of the Hugging Face dataset repository.