transform,_LazyTransformDataset,transform_first使用简单介绍

import io
import os
from gluonnlp.data.dataset import Dataset, SimpleDataset

class Dataset(object):
    """Abstract dataset class. All datasets should have this interface.

    Subclasses need to override `__getitem__`, which returns the i-th
    element, and `__len__`, which returns the total number elements.

    .. note:: An mxnet or numpy array can be directly used as a dataset.
    """
    def __getitem__(self, idx):
        raise NotImplementedError

    def __len__(self):
        raise NotImplementedError

    def filter(self, fn):
        pass


    def shard(self, num_shards, index):
        pass

    def take(self, count):
        pass

    def sample(self, sampler):
        pass

    def transform(self, fn, lazy=True):
        """Returns a new dataset with each sample transformed by the
        transformer function `fn`.
        对每一个样本用fn函数处理后,返回一个新的dataset
        Parameters
        ----------
        fn : callable #处理每一个样本的函数
            A transformer function that takes a sample as input and
            returns the transformed sample.
        lazy : bool, default True #是否延迟处理样本的dataset。
            If False, transforms all samples at once. Otherwise,
            transforms each sample on demand. Note that if `fn`
            is stochastic, you must set lazy to True or you will
            get the same result on all epochs.

        Returns
        -------
        Dataset
            The transformed dataset.
        """
        trans = _LazyTransformDataset(self, fn)#这里的self是SimpleDataset实例
        if lazy:
            return trans
        return SimpleDataset([i for i in trans])

    def transform_first(self, fn, lazy=True):
        """Returns a new dataset with the first element of each sample
        transformed by the transformer function `fn`.

        This is useful, for example, when you only want to transform data
        while keeping label as is.

        Parameters
        ----------
        fn : callable
            A transformer function that takes the first elemtn of a sample
            as input and returns the transformed element.
        lazy : bool, default True
            If False, transforms all samples at once. Otherwise,
            transforms each sample on demand. Note that if `fn`
            is stochastic, you must set lazy to True or you will
            get the same result on all epochs.

        Returns
        -------
        Dataset
            The transformed dataset.
        """
        return self.transform(_TransformFirstClosure(fn), lazy)

class _LazyTransformDataset(Dataset):
    """Lazily transformed dataset."""
    def __init__(self, data, fn):
        self._data = data #这里的data可以是列表也可以是一个SimpleDataset实例。
        self._fn = fn

    def __len__(self):
        return len(self._data)

    def __getitem__(self, idx):
        item = self._data[idx]
        if isinstance(item, tuple):#如果每个样本是个元组,解包成多个位置参数再用fn函数进行处理
            return self._fn(*item)
        return self._fn(item)


class _TransformFirstClosure(object):
    """Use callable object instead of nested function, it can be pickled."""
    def __init__(self, fn):
        self._fn = fn

    def __call__(self, x, *args):
        if args:
            return (self._fn(x),) + args
        return self._fn(x)




if __name__ == '__main__':
    a = SimpleDataset([(1,1),(2,2)])
    a1= a.transform(lambda x,y: x+y, lazy=True)
    print(a1._data)
    print(a1._data._data)#lazy=True的缘故
    print((a1[0]))#取值就不能lazy了
    print(list(a1))

    a2 = a.transform_first(lambda x:x+1,lazy=False)
    print(list(a2))

结果

<mxnet.gluon.data.dataset.SimpleDataset object at 0x00000205FE0D8BC8>
[(1, 1), (2, 2)]
2
[2, 4]
[(2, 1), (3, 2)]

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值