Transforms (1)_transforms[i]-CSDN博客

本文链接：https://blog.csdn.net/A2000613/article/details/128515953

Transforms(1)

Transform是pytorch中的一个工具包，封装了很大函数用于对图像进行预处理

transforms在计算机视觉工具包torchvision下：

torchvision.transforms : 常用的图像预处理方法
torchvision.datasets : 常用数据集的dataset实现，MNIST，CIFAR-10，ImageNet等
torchvision.model : 常用的模型预训练，AlexNet，VGG， ResNet，GoogLeNet等

torchvision.transforms : 常用的图像预处理方法，提高泛化能力

数据中心化
数据标准化
缩放
裁剪
旋转
翻转
填充
噪声添加
灰度变换
线性变换
仿射变换
亮度、饱和度及对比度变换

1.相关方法

相关文章链接：

https://blog.csdn.net/weixin_38533896/article/details/86028509

transforms.ToTensor()

将 PIL Image 或者 numpy.ndarray类型数据转换为 tensor（张量）

from torchvision import transforms
from PIL import Image

if __name__ == '__main__':

    img_path = "/home/zxz/DEEPLEARNING/DEMO/TensorBoard_1/hymenoptera_data/train/ants/0013035.jpg"
    img = Image.open(img_path)
    # img 类型 PIL Image
    print(type(img))
    print(img)

    # 创建 Tosensor 对象
    tensor_trans = transforms.ToTensor()
    # 使用方法
    tensor_img = tensor_trans(img)
    # tensor_img 类型 tensor
    print(type(tensor_img))
    print(tensor_img)

运行结果：

/home/zxz/anaconda3/envs/pytorch/bin/python /home/zxz/DEEPLEARNING/DEMO/TensorBoard_1/2.py
<class 'PIL.JpegImagePlugin.JpegImageFile'>
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=768x512 at 0x7F62D446E850>
<class 'torch.Tensor'>
tensor([[[0.3137, 0.3137, 0.3137,  ..., 0.3176, 0.3098, 0.2980],
         [0.3176, 0.3176, 0.3176,  ..., 0.3176, 0.3098, 0.2980],
         [0.3216, 0.3216, 0.3216,  ..., 0.3137, 0.3098, 0.3020],
         ...,
         [0.3412, 0.3412, 0.3373,  ..., 0.1725, 0.3725, 0.3529],
         [0.3412, 0.3412, 0.3373,  ..., 0.3294, 0.3529, 0.3294],
         [0.3412, 0.3412, 0.3373,  ..., 0.3098, 0.3059, 0.3294]],

        [[0.5922, 0.5922, 0.5922,  ..., 0.5961, 0.5882, 0.5765],
         [0.5961, 0.5961, 0.5961,  ..., 0.5961, 0.5882, 0.5765],
         [0.6000, 0.6000, 0.6000,  ..., 0.5922, 0.5882, 0.5804],
         ...,
         [0.6275, 0.6275, 0.6235,  ..., 0.3608, 0.6196, 0.6157],
         [0.6275, 0.6275, 0.6235,  ..., 0.5765, 0.6275, 0.5961],
         [0.6275, 0.6275, 0.6235,  ..., 0.6275, 0.6235, 0.6314]],

        [[0.9137, 0.9137, 0.9137,  ..., 0.9176, 0.9098, 0.8980],
         [0.9176, 0.9176, 0.9176,  ..., 0.9176, 0.9098, 0.8980],
         [0.9216, 0.9216, 0.9216,  ..., 0.9137, 0.9098, 0.9020],
         ...,
         [0.9294, 0.9294, 0.9255,  ..., 0.5529, 0.9216, 0.8941],
         [0.9294, 0.9294, 0.9255,  ..., 0.8863, 1.0000, 0.9137],
         [0.9294, 0.9294, 0.9255,  ..., 0.9490, 0.9804, 0.9137]]])

Process finished with exit code 0

代码1：

from torchvision import transforms
from PIL import Image
import cv2
from torch.utils.tensorboard import SummaryWriter

if __name__ == '__main__':
    img_path = "/home/zxz/DEEPLEARNING/DEMO/TensorBoard_1/hymenoptera_data/train/ants/0013035.jpg"
    img = Image.open(img_path)

    # 创建对象 使用tensorboard
    write = SummaryWriter("logs")

    # 创建 Tosensor 对象
    tensor_trans = transforms.ToTensor()
    # 使用方法 将 img 转化为 Tensor类型数据
    tensor_img = tensor_trans(img)

    write.add_image("test",tensor_img,1)

    write.close()

2.常见的transform

from torchvision import transforms

class Compose:
    """Composes several transforms together. This transform does not support torchscript.
    # 将 transforms 的 多个操作集中到一起 如何进行整体操作
    
        Example:
        >>> transforms.Compose([
        >>>     transforms.CenterCrop(10),
        >>>     transforms.ToTensor(),
        >>> ])
     
     
class ToTensor:
    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. This transform does not support torchscript.
	# 将 PIL Image 以及 numpy.ndarry数据类型转换为 tensor(张量)
    
    Converts a PIL Image or numpy.ndarray (H x W x C) in the range
    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
    if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
    or if the numpy.ndarray has dtype = np.uint8
    
    
class Normalize(torch.nn.Module):
    """Normalize a tensor image with mean and standard deviation.
    This transform does not support PIL Image.
    Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
    channels, this transform will normalize each channel of the input
    ``torch.*Tensor`` i.e.,
    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
	# 功能：对数据按通道进行标准化，即先减均值，再除以标准差，注意是 chw
    .. note::
        This transform acts out of place, i.e., it does not mutate the input tensor.

    Args:
        mean (sequence): Sequence of means for each channel.
        std (sequence): Sequence of standard deviations for each channel.
        inplace(bool,optional): Bool to make this operation in-place.
        
        
class Resize(torch.nn.Module):
    """Resize the input image to the given size.
    If the image is torch Tensor, it is expected
    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions

    Args:
        size (sequence or int): Desired output size. If size is a sequence like
            (h, w), output size will be matched to this. If size is an int,
            smaller edge of the image will be matched to this number.
            i.e, if height > width, then image will be rescaled to
            (size * height / width, size).
            In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
        interpolation (InterpolationMode): Desired interpolation enum defined by
            :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
            ``InterpolationMode.BICUBIC`` are supported.
            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.

    """
    
class Resize(torch.nn.Module):
    """Resize the input image to the given size.
    If the image is torch Tensor, it is expected
    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
	# 功能：重置图像分辨率
    
    Args:
        size (sequence or int): Desired output size. If size is a sequence like
            (h, w), output size will be matched to this. If size is an int,
            smaller edge of the image will be matched to this number.
            i.e, if height > width, then image will be rescaled to
            (size * height / width, size).
            In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
        interpolation (InterpolationMode): Desired interpolation enum defined by
            :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
            ``InterpolationMode.BICUBIC`` are supported.
            For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.

            size（sequence或int）：所需的输出大小。如果大小是这样的序列

			# （h，w），输出大小将与此匹配。如果size是int，图像的较小边缘将与该数字匹配。i、 e，如果高度>宽度，则图像将被重新缩放为（尺寸*高度/宽				# 度，尺寸）。在torchscript模式下，由于不支持单int，因此使用长度为1:`[size，]``的序列。插值（插值模式）：由定义的所需插值枚					# class:`torchvision.transforms.InterpolationMode`。默认值为``InterpolationMode.BILINEAR``。如果输入是Tensor，则只能使			  # 用``InterpolationMode.NEAREST``、``InterpulationMode.BILINEAR``和``支持InterpolationMode.BICUBIC``。对于向后兼容，仍  			# 然可以接受整数值（例如“PIL.Image.NEAREST”）。
    """
    
class RandomCrop(torch.nn.Module):
    """Crop the given image at a random location.
    If the image is torch Tensor, it is expected
    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions,
    but if non-constant padding is used, the input is expected to have at most 2 leading dimensions
    # 功能：依据给定的size随机裁剪

实例：

from PIL import Image
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter

img_path = "/home/zxz/DEEPLEARNING/DEMO/TensorBoard_1/hymenoptera_data/train/ants/0013035.jpg"

writer = SummaryWriter("logs")
img = Image.open(img_path)
print(img)  # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=768x512 at 0x7F2680758390>


# ToTensor (将PIL Image格式文件转化为 tensor文件)
trans_totensor = transforms.ToTensor() # 实例化类对象
img_tensor = trans_totensor(img)       # 调用专有函数 __call__
writer.add_image("Totensor",img_tensor)


# Normalize (对数据按通道进行标准化，即先减均值，再除以标准差，注意是 chw)
print(img_tensor[0][0][0]) # tensor(0.3137)
trans_norm = transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5]) # img 是三通道图片，对其转化的张量每一个通道的值进行 均值0.5 标准差0.5 的操作
img_norm = trans_norm(img_tensor)
# 计算公式 output[channel] = (input[channel] - mean[channel]) / std[channel]
print(img_norm[0][0][0]) # tensor(-0.3725)


# Resize() 功能：重置图像分辨率
print(img.size)  # (768, 512)
trans_resize = transforms.Resize((512,512)) # 创建一个实例化对象
# img PIL -> resize -> img_resize PIL
img_resize = trans_resize(img)
print(img_resize)  # <PIL.Image.Image image mode=RGB size=512x512 at 0x7F98675A4610>
# img_resize PIL -> ToTensor -> img_resize tensor
img_resize = trans_totensor(img_resize)
writer.add_image("Resize",img_resize,0)


# Compose - resize - 2
trans_resize_2 = transforms.Resize(512)
# compose 先进行分辨率的调整（将图片长变为512）  后将图片转化为 tensor
trans_compose = transforms.Compose([trans_resize_2,trans_totensor])
img_resize_2 = trans_compose(img)
writer.add_image("Resize",img_resize_2,1)


# RandomCrop 依据给定的size随机裁剪
trans_random = transforms.RandomCrop(512)
trans_compose_2 = transforms.Compose([trans_random,trans_totensor])
for i in range(10):
    img_crop = trans_compose_2(img)
    writer.add_image("RandomCrop",img_crop,i)
writer.close()

运行结果：

/home/zxz/anaconda3/envs/pytorch/bin/python /home/zxz/DEEPLEARNING/DEMO/TensorBoard_1/2.py
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=768x512 at 0x7F34361695D0>
tensor(0.3137)
tensor(-0.3725)
(768, 512)
<PIL.Image.Image image mode=RGB size=512x512 at 0x7F3435874750>

Process finished with exit code 0