有时候需要将数据存放在本地,但是Dataloader有Size限定。
import numpy as np
limit = 1024**3.8 # 假设最大为4GB => 1024MB * 3.8 (需要预留额外的内存,不能满)
save_path = '' # 设置dataloader的存储路径(默认当前运行路径)
data = np.ones((180000,60,80)).astype('float64') # 假设数据是一个numpy类型的
batchsize = 128
# chunk data
len_tmp = data.shape[0]
if len_tmp%batchsize != 0:
less_num = batchsize-(len_tmp - int(len_tmp/batchsize) *batchsize)
new_tmp = data[:less_num].copy()
tmp = np.vstack((tmp,new_tmp))
num_chunk = math.ceil(data.size / limit)
unit_portion = data.size//num_chunk
unit_element = unit_portion//data[0].size
fn = [save_path,'','dataloader.pth']
for x in range(num_chunk):
train_loader = DataLoader(data[x*unit_element:(x+1)*unit_element], shuffle=True, batch_size=batchsize, drop_last=False)
fn[1] = str(x)
torch.save(train_loader, ''.join(fn))