Transforms的结构及用法
Transforms就像一个工具箱,通过transform是里面的类对图片进行一些变换
不同类型是图片打开方式不同,PIL类型用Image.open()
,tensor类型用ToTensor()
,narrys类型用cv.imread()
常见的transforms
transforms.Compose()
Composes several transforms together.
Example:
>>> transforms.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
Compose()中参数需要是一个列表,python中,列表的表示数据类型为[数据1,数据2,…],在Compose,数据需要是transforms类型,所以得到Compose([transforms参数1,transforms参数2,…])
transforms.ToTensor()
Convert a
PIL Image
ornumpy.ndarray
to tensor.
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
writer = SummaryWriter("logs")
img = Image.open("data/train/ants_image/0013035.jpg")
print(img) #打印发现图片是PIL类型
#将一个图像(img)转换为PyTorch张量(tensor)
trans_totensor = transforms.ToTensor() #创建了一个转换器对象trans_totensor,该对象负责将图像转换为张量。
img_tensor = trans_totensor(img) #将图像img转换为张量img_tensor
writer.add_image("ToTensor", img_tensor)
writer.close()
用控制台打开tensorboard,即可看到图片
tensorboard --logdir=logs
transforms.Normalize()
Normalize a tensor image with mean and standard deviation.归一化平均值和标准差
Given mean:(mean[1],...,mean[n])
and std:(std[1],..,std[n])
forn
channels, this transform will normalize each channel of the inputtorch.*Tensor
i.e.,output[channel] = (input[channel] - mean[channel]) / std[channel]
光标放在括号内,按住Ctrl+P,可以看各个类需要什么参数
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
writer = SummaryWriter("logs")
img = Image.open("data/train/ants_image/0013035.jpg")
print(img) #打印发现图片是PIL类型
#ToTensor
#将一个图像(img)转换为PyTorch张量(tensor)
trans_totensor = transforms.ToTensor() #创建了一个转换器对象trans_totensor,该对象负责将图像转换为张量。
img_tensor = trans_totensor(img) #将图像img转换为张量img_tensor
writer.add_image("ToTensor", img_tensor)
#Normalize
print(img_tensor[0][0][0])
trans_norm = transforms.Normalize([6,3,2],[9,3,5])
img_norm = trans_norm(img_tensor)
print(img_norm[0][0][0])
writer.add_image("Nomalize", img_norm,1)
writer.close()
transforms.Resize()
Resize the input image to the given size.
The image can be a PIL Image or a torch Tensor, in which case it is expected
to have […, H, W] shape, where … means an arbitrary number of leading dimensions
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
writer = SummaryWriter("logs")
img = Image.open("data/train/ants_image/0013035.jpg")
print(img) #打印发现图片是PIL类型
#ToTensor
#将一个图像(img)转换为PyTorch张量(tensor)
trans_totensor = transforms.ToTensor() #创建了一个转换器对象trans_totensor,该对象负责将图像转换为张量。
img_tensor = trans_totensor(img) #将图像img转换为张量img_tensor
writer.add_image("ToTensor", img_tensor)
#Normalize
print(img_tensor[0][0][0])
trans_norm = transforms.Normalize([6,3,2],[9,3,5])
img_norm = trans_norm(img_tensor)
print(img_norm[0][0][0])
writer.add_image("Nomalize", img_norm,1)
writer.close()
#Resize
print(img.size) #打印显示图片大小为(768,512)
trans_resize = transforms.Resize((512, 512))
img_resize = trans_resize(img) #将PIL类型的img resize 为(512,512)
img_resize = trans_totensor(img_resize) #将PIL类型的img_resize转换为tensor类型
print(img_resize)
writer.add_image("Resize", img_resize, 0)
writer.close()
打开tensorboard
transforms.RandomCrop
随机裁剪
Crop the given image at a random location.
The image can be a PIL Image or a Tensor, in which case it is expected
to have […, H, W] shape, where … means an arbitrary number of leading
dimensions
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
writer = SummaryWriter("logs")
img = Image.open("data/train/ants_image/0013035.jpg")
print(img) #打印发现图片是PIL类型
#ToTensor
#将一个图像(img)转换为PyTorch张量(tensor)
trans_totensor = transforms.ToTensor() #创建了一个转换器对象trans_totensor,该对象负责将图像转换为张量。
img_tensor = trans_totensor(img) #将图像img转换为张量img_tensor
writer.add_image("ToTensor", img_tensor)
#Normalize
print(img_tensor[0][0][0])
trans_norm = transforms.Normalize([6,3,2],[9,3,5])
img_norm = trans_norm(img_tensor)
print(img_norm[0][0][0])
writer.add_image("Nomalize", img_norm,1)
writer.close()
#Resize
print(img.size) #打印显示图片大小为(768,512)
trans_resize = transforms.Resize((512, 512))
img_resize = trans_resize(img) #将PIL类型的img resize 为(512,512)
img_resize = trans_totensor(img_resize) #将PIL类型的img_resize转换为tensor类型
print(img_resize)
writer.add_image("Resize", img_resize, 0)
writer.close()
#RandomCrop
trans_random = transforms.RandomCrop(512)
trans_compose = transforms.Compose([trans_random, trans_totensor])
for i in range(10):
img_crop = trans_compose(img)
writer.add_image("RandomCrop", img_crop, i)
writer.close()
总结
- 关注输入输出类型
- 注重官方文档
- 关注方法需要什么参数类型