pytorch基础(二)-数据读取和图像tansformer方法

An_ich

已于 2024-01-04 19:52:05 修改

阅读量1.2k

点赞数 24

分类专栏： pytorch基础文章标签： pytorch 人工智能 python

于 2023-12-07 20:07:58 首次发布

本文链接：https://blog.csdn.net/weixin_62891098/article/details/134862635

版权

pytorch基础专栏收录该内容

11 篇文章 1 订阅

订阅专栏

数据读取

torch.utils.data.DataLoader()

dataset: Dataset[T_co]
batch_size: Optional[int]
num_workers: int
pin_memory: bool
drop_last: bool
timeout: float
sampler: Union[Sampler, Iterable]
pin_memory_device: str
prefetch_factor: Optional[int]
_iterator : Optional['_BaseDataLoaderIter']
__initialized = False

dataset：Dataset类

batch_size：批次大小

num_workers：多进程读取数据

drop_last：当样本数不能被batch_size整除时，是否丢弃最后一批次的数据

torch.utils.data.Dataset()

class Dataset(Generic[T_co]):
   
    def __getitem__(self, index) -> T_co:
        raise NotImplementedError("Subclasses of Dataset should implement __getitem__.")

    # def __getitems__(self, indices: List) -> List[T_co]:
    # Not implemented to prevent false-positives in fetcher check in
    # torch.utils.data._utils.fetch._MapDatasetFetcher

    def __add__(self, other: 'Dataset[T_co]') -> 'ConcatDataset[T_co]':
        return ConcatDataset([self, other])

getitems：接收一个索引，返回一个样本，可以使用默认方式加载索引，也可以通过定义类属性的方式自定义自己的索引，在getitems方法中通过索引加载数据。

class TrainDataset(Dataset):
    def __init__(self, data_path):

        mix_dir = os.path.join(data_path, 'r.txt')
        s_dir = os.path.join(data_path, 'x.txt')
        mix_infos = []
        for line in open(mix_dir, "r"):  # 设置文件对象并读取每一行文件
            name = data_path + '/r/' + line[:-1]
            mix_infos.append(name)  # 将每一行文件加入到list中

        s_infos = []
        for line in open(s_dir, "r"):  # 设置文件对象并读取每一行文件
            name_ = data_path + '/x/' + line[:-1]
            s_infos.append(name_)  # 将每一行文件加入到list中

        self.mat_mix = mix_infos
        self.mat_s1 = s_infos

    def __len__(self):
        return len(self.mat_mix)

    def __getitem__(self, item):
        mat_mix_index = self.mat_mix[item]
        mat_s1_index = self.mat_s1[item]

        mix = scio.loadmat(mat_mix_index)
        s = scio.loadmat(mat_s1_index)
        x_name = mat_s1_index.split('_')[0][-1] + '_' + mat_s1_index.split('_')[-1]
        r_name = mat_mix_index.split('_')[0][-1] + '_' + mat_mix_index.split('_')[-1]

        sample = {'r_name': r_name, 'r': mix, 'x_name': x_name, 'x': s}
        return sample

数据增强

通过transformer实现对图像数据的增强

train_transform = transforms.Compose([transforms.Resize((224, 224))

原始图片（224，224）

根据call方法，transform方法输入一个返回一个，当前方法的输入是上一方法的输出。

class Compose:
    
    def __init__(self, transforms):
        if not torch.jit.is_scripting() and not torch.jit.is_tracing():
            _log_api_usage_once(self)
        self.transforms = transforms

    def __call__(self, img):
        for t in self.transforms:
            img = t(img)
        return img

    def __repr__(self) -> str:
        format_string = self.__class__.__name__ + "("
        for t in self.transforms:
            format_string += "\n"
            format_string += f"    {t}"
        format_string += "\n)"
        return format_string

一、裁剪

transforms.CenterCrop()

class CenterCrop(torch.nn.Module):
   

    def __init__(self, size):
        super().__init__()
        _log_api_usage_once(self)
        self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")

    def forward(self, img):
        """
        Args:
            img (PIL Image or Tensor): Image to be cropped.

        Returns:
            PIL Image or Tensor: Cropped image.
        """
        return F.center_crop(img, self.size)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(size={self.size})"

从图像中心裁剪图像

size：剪切后的大小，比原始尺寸小时直接剪切

比原始尺寸大时进行填充

transforms.RandomCrop()

class RandomCrop(torch.nn.Module):
    """Crop the given image at a random location.
    If the image is torch Tensor, it is expected
    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions,
    but if non-constant padding is used, the input is expected to have at most 2 leading dimensions

    Args:
        size (sequence or int): Desired output size of the crop. If size is an
            int instead of sequence like (h, w), a square crop (size, size) is
            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
        padding (int or sequence, optional): Optional padding on each border
            of the image. Default is None. If a single int is provided this
            is used to pad all borders. If sequence of length 2 is provided this is the padding
            on left/right and top/bottom respectively. If a sequence of length 4 is provided
            this is the padding for the left, top, right and bottom borders respectively.

            .. note::
                In torchscript mode padding as single int is not supported, use a sequence of
                length 1: ``[padding, ]``.
        pad_if_needed (boolean): It will pad the image if smaller than the
            desired size to avoid raising an exception. Since cropping is done
            after padding, the padding seems to be done at a random offset.
        fill (number or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
            length 3, it is used to fill R, G, B channels respectively.
            This value is only used when the padding_mode is constant.
            Only number is supported for torch Tensor.
            Only int or tuple value is supported for PIL Image.
        padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
            Default is constant.

            - constant: pads with a constant value, this value is specified with fill

            - edge: pads with the last value at the edge of the image.
              If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2

            - reflect: pads with reflection of image without repeating the last value on the edge.
              For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
              will result in [3, 2, 1, 2, 3, 4, 3, 2]

            - symmetric: pads with reflection of image repeating the last value on the edge.
              For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
              will result in [2, 1, 1, 2, 3, 4, 4, 3]
    """

    @staticmethod
    def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int, int]:
        """Get parameters for ``crop`` for a random crop.

        Args:
            img (PIL Image or Tensor): Image to be cropped.
            output_size (tuple): Expected output size of the crop.

        Returns:
            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
        """
        _, h, w = F.get_dimensions(img)
        th, tw = output_size

        if h < th or w < tw:
            raise ValueError(f"Required crop size {(th, tw)} is larger than input image size {(h, w)}")

        if w == tw and h == th:
            return 0, 0, h, w

        i = torch.randint(0, h - th + 1, size=(1,)).item()
        j = torch.randint(0, w - tw + 1, size=(1,)).item()
        return i, j, th, tw

    def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"):
        super().__init__()
        _log_api_usage_once(self)

        self.size = tuple(_setup_size(size, error_msg="Please provide only two dimensions (h, w) for size."))

        self.padding = padding
        self.pad_if_needed = pad_if_needed
        self.fill = fill
        self.padding_mode = padding_mode

    def forward(self, img):
        """
        Args:
            img (PIL Image or Tensor): Image to be cropped.

        Returns:
            PIL Image or Tensor: Cropped image.
        """
        if self.padding is not None:
            img = F.pad(img, self.padding, self.fill, self.padding_mode)

        _, height, width = F.get_dimensions(img)
        # pad the width if needed
        if self.pad_if_needed and width < self.size[1]:
            padding = [self.size[1] - width, 0]
            img = F.pad(img, padding, self.fill, self.padding_mode)
        # pad the height if needed
        if self.pad_if_needed and height < self.size[0]:
            padding = [0, self.size[0] - height]
            img = F.pad(img, padding, self.fill, self.padding_mode)

        i, j, h, w = self.get_params(img, self.size)

        return F.crop(img, i, j, h, w)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(size={self.size}, padding={self.padding})"

size：裁剪后的尺寸

padding：当padding=a时上下左右均填充a个像素

transforms.RandomCrop(224, padding=16)

当padding=(a,b)时，左右填充a个像素，上下填充b个像素

transforms.RandomCrop(224, padding=(16, 64))

当padding=（a,b,c,d）时左、上、右、下分别填充a、b、c、d个像素

padding_model：填充模式，4种：‘constant’、‘edge’、‘reflect’、‘symmetric’，
constant：单一像素，像素颜色由fill决定，

transforms.RandomCrop(224, padding=16, fill=(255, 0, 0))

edge：边缘像素，

transforms.RandomCrop(224, padding=64, padding_mode='edge'),

reflect：不包含边缘的镜像，比如[1,2,3]-->[3,2,1,2,3,2,1]

transforms.RandomCrop(224, padding=64, padding_mode='reflect'),

symmetric：包含边缘的镜像，比如[1,2,3]-->[3,2,1,1,2,3,3,2,1]

transforms.RandomCrop(224, padding=64, padding_mode='symmetric')

transforms.RandomResizeCrop()

class RandomResizedCrop(torch.nn.Module):
    """Crop a random portion of image and resize it to a given size.

    If the image is torch Tensor, it is expected
    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions

    A crop of the original image is made: the crop has a random area (H * W)
    and a random aspect ratio. This crop is finally resized to the given
    size. This is popularly used to train the Inception networks.

    Args:
        size (int or sequence): expected output size of the crop, for each edge. If size is an
            int instead of sequence like (h, w), a square output size ``(size, size)`` is
            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).

            .. note::
                In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
        scale (tuple of float): Specifies the lower and upper bounds for the random area of the crop,
            before resizing. The scale is defined with respect to the area of the original image.
        ratio (tuple of float): lower and upper bounds for the random aspect ratio of the crop, before
            resizing.
        interpolation (InterpolationMode): Desired interpolation enum defined by
            :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.NEAREST_EXACT``,
            ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
            The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
        antialias (bool, optional): Whether to apply antialiasing.
            It only affects **tensors** with bilinear or bicubic modes and it is
            ignored otherwise: on PIL images, antialiasing is always applied on
            bilinear or bicubic modes; on other modes (for PIL images and
            tensors), antialiasing makes no sense and this parameter is ignored.
            Possible values are:

            - ``True``: will apply antialiasing for bilinear or bicubic modes.
              Other mode aren't affected. This is probably what you want to use.
            - ``False``: will not apply antialiasing for tensors on any mode. PIL
              images are still antialiased on bilinear or bicubic modes, because
              PIL doesn't support no antialias.
            - ``None``: equivalent to ``False`` for tensors and ``True`` for
              PIL images. This value exists for legacy reasons and you probably
              don't want to use it unless you really know what you are doing.

            The current default is ``None`` **but will change to** ``True`` **in
            v0.17** for the PIL and Tensor backends to be consistent.
    """

    def __init__(
        self,
        size,
        scale=(0.08, 1.0),
        ratio=(3.0 / 4.0, 4.0 / 3.0),
        interpolation=InterpolationMode.BILINEAR,
        antialias: Optional[Union[str, bool]] = "warn",
    ):
        super().__init__()
        _log_api_usage_once(self)
        self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")

        if not isinstance(scale, Sequence):
            raise TypeError("Scale should be a sequence")
        if not isinstance(ratio, Sequence):
            raise TypeError("Ratio should be a sequence")
        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
            warnings.warn("Scale and ratio should be of kind (min, max)")

        if isinstance(interpolation, int):
            interpolation = _interpolation_modes_from_int(interpolation)

        self.interpolation = interpolation
        self.antialias = antialias
        self.scale = scale
        self.ratio = ratio

    @staticmethod
    def get_params(img: Tensor, scale: List[float], ratio: List[float]) -> Tuple[int, int, int, int]:
        """Get parameters for ``crop`` for a random sized crop.

        Args:
            img (PIL Image or Tensor): Input image.
            scale (list): range of scale of the origin size cropped
            ratio (list): range of aspect ratio of the origin aspect ratio cropped

        Returns:
            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
            sized crop.
        """
        _, height, width = F.get_dimensions(img)
        area = height * width

        log_ratio = torch.log(torch.tensor(ratio))
        for _ in range(10):
            target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
            aspect_ratio = torch.exp(torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item()

            w = int(round(math.sqrt(target_area * aspect_ratio)))
            h = int(round(math.sqrt(target_area / aspect_ratio)))

            if 0 < w <= width and 0 < h <= height:
                i = torch.randint(0, height - h + 1, size=(1,)).item()
                j = torch.randint(0, width - w + 1, size=(1,)).item()
                return i, j, h, w

        # Fallback to central crop
        in_ratio = float(width) / float(height)
        if in_ratio < min(ratio):
            w = width
            h = int(round(w / min(ratio)))
        elif in_ratio > max(ratio):
            h = height
            w = int(round(h * max(ratio)))
        else:  # whole image
            w = width
            h = height
        i = (height - h) // 2
        j = (width - w) // 2
        return i, j, h, w

    def forward(self, img):
        """
        Args:
            img (PIL Image or Tensor): Image to be cropped and resized.

        Returns:
            PIL Image or Tensor: Randomly cropped and resized image.
        """
        i, j, h, w = self.get_params(img, self.scale, self.ratio)
        return F.resized_crop(img, i, j, h, w, self.size, self.interpolation, antialias=self.antialias)

    def __repr__(self) -> str:
        interpolate_str = self.interpolation.value
        format_string = self.__class__.__name__ + f"(size={self.size}"
        format_string += f", scale={tuple(round(s, 4) for s in self.scale)}"
        format_string += f", ratio={tuple(round(r, 4) for r in self.ratio)}"
        format_string += f", interpolation={interpolate_str}"
        format_string += f", antialias={self.antialias})"
        return format_string

size：裁剪后图片大小

scale：随机裁剪面积的比例，默认（0.08，1）

ratio：随机长宽比，默认（3/4，4/3）

interpolation：插值方法

transforms.RandomResizedCrop(size=224, scale=(0.5, 0.5))

transforms.FiveCrop()

在上下左右中心裁剪出，size大小的图像

transforms.TenCrop()

除FiveCrop的图像外，进一步对FiveCrop的图像进行翻转

size：剪切后的图像大小

vertical_flip：True（垂直翻转）/False（水平翻转）

二、旋转

transforms.RandomHorizontalFlip()

ransforms.RandomHorizontalFlip(p=1)

p：概率，依照概率水平翻转图片

transforms.RandomVerticalFlip()

transforms.RandomVerticalFlip(p=1)

p：概率，依照概率垂直旋转图像

transforms.RandomRotation()

transforms.RandomRotation(30, center=(0, 0), expand=True)

degrees：旋转角度

center：旋转中心

resample：重采样方法

expand：是否扩张图片

三、图像变换

transforms.pad()

transforms.Pad(padding=(8, 16, 32, 64),  padding_mode='symmetric')

padding：（a,b,c,d）左上右下的填充像素

padding_mode：4种：‘constant’、‘edge’、‘reflect’、‘symmetric’

transforms.ColorJitter()

transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.3)

brightness：亮度，当输入为a时，在[max(0,1-a), 1+a]中随机选取亮度；当输入为(a,b)时，在[a,b]之间选取亮度

contrast：对比度，设置如亮度

saturation：饱和度，设置如亮度

hue：色相，当为a时，从[-a,a]之间选择，a在[0,0.5]之间；当为(a,b)时，在[a,b]之间选择，a<=b,且a，b在[0,0.5]之间

transforms.Grayscale（）

transforms.RandomGrayscale(p=1)

p：概率，依据概率将图片转化为灰度图

transforms.RandomAffine（）

def __init__(
    self,
    degrees,
    translate=None,
    scale=None,
    shear=None,
    interpolation=InterpolationMode.NEAREST,
    fill=0,
    center=None,
):

shear：斜切

transforms.RandomAffine(degrees=0, shear=90, fill=(255, 0, 0))

transforms.RandomErasing(）

transforms.ToTensor(),
transforms.RandomErasing(p=1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=(254/255, 0, 0))

self.p = p
self.scale = scale
self.ratio = ratio
self.value = value
self.inplace = inplace

p：执行遮挡的概率

scale：遮挡的面积

ratio：遮挡区域长宽比

value：遮挡区域的颜色

四、transform操作

transforms.RandomChoice()

class RandomChoice(RandomTransforms):
    """Apply single transformation randomly picked from a list. This transform does not support torchscript."""

    def __init__(self, transforms, p=None):
        super().__init__(transforms)
        if p is not None and not isinstance(p, Sequence):
            raise TypeError("Argument p should be a sequence")
        self.p = p

    def __call__(self, *args):
        t = random.choices(self.transforms, weights=self.p)[0]
        return t(*args)

    def __repr__(self) -> str:
        return f"{super().__repr__()}(p={self.p})"

transforms.RandomChoice([transforms.RandomVerticalFlip(p=1),transforms.RandomHorizontalFlip(p=1)])

在包含的transforms方法中任选一个

transforms.RandomApply()

class RandomApply(torch.nn.Module):
    """Apply randomly a list of transformations with a given probability.

    .. note::
        In order to script the transformation, please use ``torch.nn.ModuleList`` as input instead of list/tuple of
        transforms as shown below:

        >>> transforms = transforms.RandomApply(torch.nn.ModuleList([
        >>>     transforms.ColorJitter(),
        >>> ]), p=0.3)
        >>> scripted_transforms = torch.jit.script(transforms)

        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
        `lambda` functions or ``PIL.Image``.

    Args:
        transforms (sequence or torch.nn.Module): list of transformations
        p (float): probability
    """

    def __init__(self, transforms, p=0.5):
        super().__init__()
        _log_api_usage_once(self)
        self.transforms = transforms
        self.p = p

    def forward(self, img):
        if self.p < torch.rand(1):
            return img
        for t in self.transforms:
            img = t(img)
        return img

    def __repr__(self) -> str:
        format_string = self.__class__.__name__ + "("
        format_string += f"\n    p={self.p}"
        for t in self.transforms:
            format_string += "\n"
            format_string += f"    {t}"
        format_string += "\n)"
        return format_string

transforms.RandomApply([transforms.RandomAffine(degrees=0, shear=45, fill=(255, 0, 0)),
                        transforms.Grayscale(num_output_channels=3)], p=0.5)

p：依据概率执行一组transforms方法

transforms.RandomOrder()

transforms.RandomOrder([transforms.RandomRotation(15),
                        transforms.Pad(padding=32),
                        transforms.RandomAffine(degrees=0, translate=(0.01, 0.1), scale=(0.9, 1.1))])

对一组transforms方法打乱顺序执行

五、自定义transforms方法

1.继承类

2.在init中定义属性

3.在call中定义方法

class NAME(object):
    """note
    Args:
        xx: xxxxxx
        xx: xxxxxx
    """

    def __init__(self, xx_1 , xx_2 = default_value):
        assert xxxxxxx    # 声明
        self.xx_1 = xx_1 
        self.xx_2 = xx_2 

    def __call__(self, img):
        """
        Args:
            xxxxx
        Returns:
            xxxx
        """
        xxxx
            return xxx

class AddPepperNoise(object):
    """增加椒盐噪声
    Args:
        snr （float）: Signal Noise Rate
        p (float): 概率值，依概率执行该操作
    """

    def __init__(self, snr, p=0.9):
        assert isinstance(snr, float) and (isinstance(p, float))    # 2020 07 26 or --> and
        self.snr = snr
        self.p = p

    def __call__(self, img):
        """
        Args:
            img (PIL Image): PIL Image
        Returns:
            PIL Image: PIL image.
        """
        if random.uniform(0, 1) < self.p:
            img_ = np.array(img).copy()
            h, w, c = img_.shape
            signal_pct = self.snr
            noise_pct = (1 - self.snr)
            mask = np.random.choice((0, 1, 2), size=(h, w, 1), p=[signal_pct, noise_pct/2., noise_pct/2.])
            mask = np.repeat(mask, c, axis=2)
            img_[mask == 1] = 255   # 盐噪声
            img_[mask == 2] = 0     # 椒噪声
            return Image.fromarray(img_.astype('uint8')).convert('RGB')
        else:
            return img