【debug】在colab、kaggle上传hugging face的datasets、models失败

问题

TypeError                                 Traceback (most recent call last)
Cell In[23], line 1
----> 1 scraped_ds.push_to_hub("my_data_repo")

File /opt/conda/lib/python3.10/site-packages/datasets/dataset_dict.py:899, in DatasetDict.push_to_hub(self, repo_id, private, token, branch, shard_size, embed_external_files)
    897 logger.warning(f"Pushing split {split} to the Hub.")
    898 # The split=key needs to be removed before merging
--> 899 repo_id, split, uploaded_size, dataset_nbytes = self[split]._push_parquet_shards_to_hub(
    900     repo_id,
    901     split=split,
    902     private=private,
    903     token=token,
    904     branch=branch,
    905     shard_size=shard_size,
    906     embed_external_files=embed_external_files,
    907 )
    908 total_uploaded_size += uploaded_size
    909 total_dataset_nbytes += dataset_nbytes

File /opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:3474, in Dataset._push_parquet_shards_to_hub(self, repo_id, split, private, token, branch, shard_size, embed_external_files)
   3472     shard.to_parquet(buffer)
   3473     uploaded_size += buffer.tell()
-> 3474     _retry(
   3475         api.upload_file,
   3476         func_kwargs=dict(
   3477             path_or_fileobj=buffer.getvalue(),
   3478             path_in_repo=path_in_repo(index),
   3479             repo_id=repo_id,
   3480             token=token,
   3481             repo_type="dataset",
   3482             revision=branch,
   3483             identical_ok=True,
   3484         ),
   3485         exceptions=HTTPError,
   3486         status_codes=[504],
   3487         base_wait_time=2.0,
   3488         max_retries=5,
   3489         max_wait_time=20.0,
   3490     )
   3491 return repo_id, split, uploaded_size, dataset_nbytes

File /opt/conda/lib/python3.10/site-packages/datasets/utils/file_utils.py:330, in _retry(func, func_args, func_kwargs, exceptions, status_codes, max_retries, base_wait_time, max_wait_time)
    328 while True:
    329     try:
--> 330         return func(*func_args, **func_kwargs)
    331     except exceptions as err:
    332         if retry >= max_retries or (status_codes and err.response.status_code not in status_codes):

File /opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
    115 if check_use_auth_token:
    116     kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/huggingface_hub/hf_api.py:826, in future_compatible.<locals>._inner(self, *args, **kwargs)
    823     return self.run_as_future(fn, self, *args, **kwargs)
    825 # Otherwise, call the function normally
--> 826 return fn(self, *args, **kwargs)

TypeError: HfApi.upload_file() got an unexpected keyword argument 'identical_ok'

解决方法

更新版本

!pip install -U datasets huggingface-hub

重启kernal:点击 factory restrat

  • 4
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值