- getitem 方法
在类中有__getitem__ 方法,代表当实例对象通过[] 运算符取值时,会调用它的方法__getitem__。 - torch.utils.data.DataLoader
参数:
(1) dataset: 使用的数据集。
(2) batch_size:每个batch的数量。
(3) shuffle: 是否打乱顺序。
(4) sampler: 采样方法,和shuffle互斥。
(5) batch_sampler:批量采样,和batch_size、shuffle互斥。
(7) num_workers: 大于0的数表示通过多个进程来导入数据,可以加快数据导入速度。(8) pin_memory: 将数据拷贝到cuda中。
(9) timeout: 若超过时间还未读取到数据会报错。
使用:
源码:for data in loader: data = data.cuda()
class DataLoader(object): """ Data loader. Combines a dataset and a sampler, and provides single- or multi-process iterators over the dataset. Arguments: dataset (Dataset): dataset from which to load the data. batch_size (int, optional): how many samples per batch to load (default: 1). shuffle (bool, optional): set to ``True`` to have the data reshuffled at every epoch (default: False). sampler (Sampler, optional): defines the strategy to draw samples from the dataset. If specified, ``shuffle`` must be False. batch_sampler (Sampler, optional): like sampler, but returns a batch of indices at a time. Mutually exclusive with batch_size, shuffle, sampler, and drop_last. num_workers (int, optional): how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0) collate_fn (callable, optional): merges a list of samples to form a mini-batch. pin_memory (bool, optional): If ``True``, the data loader will copy tensors into CUDA pinned memory before returning them. drop_last (bool, optional): set to ``True`` to drop the last incomplete batch, if the dataset size is not divisible by the batch size. If ``False`` and the size of dataset is not divisible by the batch size, then the last batch will be smaller. (default: False) timeout (numeric, optional): if positive, the timeout value for collecting a batch from workers. Should always be non-negative. (default: 0) worker_init_fn (callable, optional): If not None, this will be called on each worker subprocess with the worker id as input, after seeding and before data loading. (default: None) """ def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None): self.dataset = dataset self.batch_size = batch_size self.num_workers = num_workers self.collate_fn = collate_fn self.pin_memory = pin_memory self.drop_last = drop_last self.timeout = timeout self.worker_init_fn = worker_init_fn if timeout < 0: raise ValueError('timeout option should be non-negative') if batch_sampler is not None: if batch_size > 1 or shuffle or sampler is not None or drop_last: raise ValueError('batch_sampler is mutually exclusive with ' 'batch_size, shuffle, sampler, and drop_last') if sampler is not None and shuffle: raise ValueError('sampler is mutually exclusive with shuffle') if self.num_workers < 0: raise ValueError('num_workers cannot be negative; ' 'use num_workers=0 to disable multiprocessing.') if batch_sampler is None: if sampler is None: if shuffle: sampler = RandomSampler(dataset) else: sampler = SequentialSampler(dataset) batch_sampler = BatchSampler(sampler, batch_size, drop_last) self.sampler = sampler self.batch_sampler = batch_sampler def __iter__(self): return DataLoaderIter(self) def __len__(self): return len(self.batch_sampler)
若有多个数据需要一起使用,重写Data.Dataset方法:
class MyDataSet(Data.Dataset):
def __init__(self, enc_inputs, dec_inputs, dec_outputs):
super(MyDataSet, self).__init__()
self.enc_inputs = enc_inputs
self.dec_inputs = dec_inputs
self.dec_outputs = dec_outputs
def __len__(self):
return self.enc_inputs.shape[0]
def __getitem__(self, idx):
return self.enc_inputs[idx], self.dec_inputs[idx], self.dec_outputs[idx]
loader = Data.DataLoader(MyDataSet(enc_inputs, dec_inputs, dec_outputs), 2, True)
-
nn.module
在构造模型时,要继承nn.module类import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): # 固定内容 super(Model, self).__init__() # 定义相关的函数 self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): # 构建模型结构,可以使用F函数内容,其他调用__init__里面的函数 x = F.relu(self.conv1(x)) # 返回最终的结果 return F.relu(self.conv2(x))
-
squeeze() 和 unsqueeze():
unsqueeze(i): 增加第i维
squeeze(i) : 去掉第i维 -
切片b = a[i:j:s]
从i到j-1步长为s
所以 b = a[0::2] 代表从0开始到最后,步长为2 -
expand()
返回当前张量在某维扩展更大后的张量。扩展(expand)张量不会分配新的内存,只是在存在的张量上创建一个新的视图(view),一个大小(size)等于1的维度扩展到更大的尺寸。import torch >> x = torch.tensor([1, 2, 3]) >> x.expand(2, 3) tensor([[1, 2, 3], [1, 2, 3]])
>> x = torch.randn(2, 1, 1, 4) >> x.expand(-1, 2, 3, -1) torch.Size([2, 2, 3, 4])
-
repeat()
沿着特定的维度重复这个张量,和expand()不同的是,这个函数拷贝张量的数据。import torch >> x = torch.tensor([1, 2, 3]) >> x.repeat(3, 2) tensor([[1, 2, 3, 1, 2, 3], [1, 2, 3, 1, 2, 3], [1, 2, 3, 1, 2, 3]])
-
masked_fill_()
masked_fill_(mask, value)
掩码操作
用value填充tensor中与mask中值为1位置相对应的元素。mask的形状必须与要填充的tensor形状一致。 -
np.triu()
返回函数的上三角矩阵np.triu([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], -1) >>>array([[ 1, 2, 3], [ 4, 5, 6], [ 0, 8, 9], [ 0, 0, 12]]) np.triu([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], 1) >>>array([[0, 2, 3], [0, 0, 6], [0, 0, 0], [0, 0, 0]]) np.triu([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], 0) >>>array([[1, 2, 3], [0, 5, 6], [0, 0, 9], [0, 0, 0]])
-
torch.gt()
torch.gt(a,b)函数比较a中元素大于(这里是严格大于)b中对应元素,大于则为1,不大于则为0,这里a为Tensor,b可以为与a的size相同的Tensor或常数。
>>> import torch
>>> a=torch.randn(2,4)
>>> a
tensor([[-0.5466, 0.9203, -1.3220, -0.7948],
[ 2.0300, 1.3090, -0.5527, -0.1326]])
>>> b=torch.randn(2,4)
>>> b
tensor([[-0.0160, -0.3129, -1.0287, 0.5962],
[ 0.3191, 0.7988, 1.4888, -0.3341]])
>>> torch.gt(a,b) #得到a中比b中元素大的位置
tensor([[0, 1, 0, 0],
[1, 1, 0, 1]], dtype=torch.uint8)