for k, v in first.items():
# if k not in ("label", "label_ids") and v is not None and not isinstance(v, str):
if k not in ("label", "label_ids") and v is not None:
if isinstance(v, torch.Tensor):
batch[k] = torch.stack([f[k] for f in features])
elif isinstance(v, np.ndarray):
batch[k] = torch.tensor(np.stack([f[k] for f in features]))
else:
try:
batch[k] = torch.tensor([f[k] for f in features])
except:
batch[k]= [ f[k] for f in features]
return batch
huggingface的data_collator会检查输入的项是否为str,如果是str则进行过滤
如果我们想将str作为模型的输入,就需要取消对str的过滤