从huggingface 上下载指定文件（csv or txt）

Jcy_ooo

已于 2024-07-01 17:55:38 修改

阅读量209

点赞数 1

文章标签： python 计算机视觉人工智能深度学习

于 2024-06-27 15:40:19 首次发布

本文链接：https://blog.csdn.net/mathstudent/article/details/140015264

版权

import os
import token
import pandas as pd
from huggingface_hub import snapshot_download
from huggingface_hub import hf_hub_download

os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"  

dataset_name = "allenai/objaverse"
kiui_uids = pd.read_csv("kiuisobj_v1_merged_80K.csv", header=None)
num = len(kiui_uids)

for i in range(num):
    glbs_id = kiui_uids.loc[i][0]
    glbs_name = kiui_uids.loc[i][1] + ".glb"
    name_all = f"glbs/{glbs_id}/{glbs_name}"
    print(f'{name_all} start!')
    while True:
        try:
            hf_hub_download(repo_id=dataset_name, filename=name_all, 
                repo_type="dataset",
                local_dir="./objaverse",
                local_dir_use_symlinks=False, resume_download=True)

            break
        except Exception as e:
            print('wrong')
            print(e)
            # break
     
    print(f'{name_all} get!')
    
    
other_file = ["lvis-annotations.json.gz", "object-paths.json.gz", "metadata"]
for file_name in other_file:
    print(f'{file_name} start!')
    while True:
        try:
            hf_hub_download(repo_id=dataset_name, filename=file_name, 
                repo_type="dataset",
                local_dir="./objaverse",
                local_dir_use_symlinks=False, resume_download=True)

            break
        except:
            #print(f'{file_name} download wrong!')
            pass
    print(f'{file_name} get!')

import os
import token
import pandas as pd
from huggingface_hub import snapshot_download
from huggingface_hub import hf_hub_download
import json
 
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"  
 
dataset_name = "allenai/objaverse"
kiui_uids = pd.read_csv("kiuisobj_v1_merged_80K.csv", header=None)
filter_json = r"/root/autodl-tmp/Wonder3D/data_lists/lvis_uids_filter_by_vertex.json"
with open(filter_json, 'r') as f:
    flist = json.load(f)

num = len(kiui_uids)

numall = 3
numi = 0
idx_list = []
glbs_id_list = []
for i in range(num):
    if numi >= numall:
        break
    
    glbs_id = kiui_uids.loc[i][0]
    idx = kiui_uids.loc[i][1]
    if idx not in flist:
        continue
    else:
        numi += 1
        idx_list.append(idx)
        glbs_id_list.append(glbs_id)
    glbs_name = idx + ".glb"
    name_all = f"glbs/{glbs_id}/{glbs_name}"
    mate_all = f"metadata/{glbs_id}.json.gz"
    print(f'{name_all} start!')
    while True:
        try:
            hf_hub_download(repo_id=dataset_name, filename=name_all, 
                repo_type="dataset",
                local_dir="./objaverse_subset",
                local_dir_use_symlinks=False, resume_download=True)
            
            hf_hub_download(repo_id=dataset_name, filename=mate_all, 
                repo_type="dataset",
                local_dir="./objaverse_subset",
                local_dir_use_symlinks=False, resume_download=True)
 
            break
        except Exception as e:
            print('wrong')
            print(e)
            # break
     
    print(f'{name_all} get!')
    
    
other_file = ["lvis-annotations.json.gz", "object-paths.json.gz"]
for file_name in other_file:
    print(f'{file_name} start!')
    while True:
        try:
            hf_hub_download(repo_id=dataset_name, filename=file_name, 
                repo_type="dataset",
                local_dir="./objaverse_subset",
                local_dir_use_symlinks=False, resume_download=True)
 
            break
        except:
            #print(f'{file_name} download wrong!')
            pass
    print(f'{file_name} get!')
    
    
filter_json_new = r"/root/autodl-tmp/Wonder3D/data_lists/lvis_uids_filter_by_vertex_100.json"
with open(filter_json_new, 'w') as f:
    json.dump(idx_list, f)
    
    
glb_json_new = r"/root/autodl-tmp/Wonder3D/data_lists/lvis_uids_filter_by_vertex_100glb.json"
with open(glb_json_new, 'w') as f:
    json.dump(glbs_id_list, f)

运行前需要：

export HF_ENDPOINT=https://hf-mirror.com