PyTorch深度学习(3)Transforms CIFAR10

使用Transforms,需要先引入 from torchvision import transforms

Tensor  张量  实际就是一个多维数组multidimensional array,其目的是能够创造更高维度的矩阵、向量

__call__方法:

魔法函数__call__,即把类当作函数使用,不需要再调用类中的函数

例如:person = Person()    person("name")    person.Name()

# 创建具体的工具 tool = transforms.ToTensor()
# 使用工具  result = tool(input)  输出结果  调用Tensor __call__是把类当作函数使用
# PIL Image  numpy.ndarray --> tensor

# Tensor 张量  实际就是一个多维数组multidimensional array,其目的是能够创造更高维度的矩阵、向量

from PIL import Image
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter

# python的用法 ——> tensor数据类型
# 通过transforms.ToTensor()

# 绝对路径 D:\PycharmProjects\learn_pytorch\train\ants\5650366_e22b7e1065.jpg  可以在前面添加r转义
# 相对路径 train/ants/5650366_e22b7e1065.jpg
img_path = "train/ants/5650366_e22b7e1065.jpg"
img_PIL = Image.open(img_path)

# transforms 如何使用(python)
# ToTensor()使用
tensor_trans = transforms.ToTensor()
img_trans = tensor_trans(img_PIL)      # 调用Tensor的魔法函数__call__  返回F.to_tensor(pic)

writer = SummaryWriter("logs")
writer.add_image("Tensor_image", img_trans)
writer.close()

ToTensor():

将PIL Image  numpy.ndarray 转换为 tensor

创建具体的工具 tool = transforms.ToTensor()

使用工具  result = tool(input)

from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

img_path = "train/ants/20935278_9190345f6b.jpg"
img = Image.open(img_path)

writer = SummaryWriter("logs")

# ToTensor
trans_tensor = transforms.ToTensor()    # PIL Image or numpy.array
img_tensor = trans_tensor(img)
writer.add_image("ToTensor", img_tensor)

Normalize  归一化

transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  参数:均值,标准差

# Normalize  归一化
print(img_tensor[0][0][0])  # 0层0行0列  transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])时,输出:0.8275
trans_norm = transforms.Normalize([1, 3, 5], [3, 2, 1])  # 参数:均值、标准差
img_norm = trans_norm(img_tensor)
print(img_norm[0][0][0])  # 输出:0.6549
writer.add_image("Normalize", img_norm)

Resize  重置大小

输入为PIL Image

给定一个序列(h, w),最小的边匹配number,等比缩放

# Resize
trans_resize = transforms.Resize((512, 512))
# img PIL -> resize -> img_resize PIL
img_resize = trans_resize(img)
# img_resize PIL -> toTensor -> img_resize tensor
img_tensor_resize = trans_tensor(img_resize)
writer.add_image("Resize", img_tensor_resize, 0)

Compose  将多个参数功能整合

Compose()  中参数需要是一个列表,列表的数据表示形式为[数据1, 数据2, ...]

Compose()中,数据需要时transforms类型,得Compose([transforms参数1, transforms参数2, ...])

# Compose中参数需要一个列表,列表形式为[数据1, 数据2, ...]
# 在Compose中,数据需要的是transforms类型, Compose([transforms参数1, transforms参数2, ...])
trans_resize_2 = transforms.Resize(512)     # Resize中一个数,为按照图片最小边进行缩放
trans_compose = transforms.Compose([trans_resize_2, trans_tensor])   # 第一个参数:改变图片大小,第二个参数:转换类型
img_resize_2 = trans_compose(img)
writer.add_image("Resize2", img_resize_2, 1)

RandomCrop  随即裁剪

只会裁剪为指定(h, w) 宽高

# RandomCrop  随机裁剪
trans_rc = transforms.RandomCrop(128)
trans_comp = transforms.Compose([trans_rc, trans_tensor])
for i in range(10):
    img_crop = trans_comp(img)
    writer.add_image("RandomCropHW", img_crop, i)

writer.close()

Dataset 和 Transforms 联合使用

CIFAR10数据集中共有60000张彩色图像,图像32×32,分为10个类。50000张用于训练,10000张用于测试

10类分别为['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

数据集中数据类型为:(tensor, 类别标号)

# dataset 和 transforms 联合使用
import torchvision
from torch.utils.tensorboard import SummaryWriter
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

dataset_transformer = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

# root-数据加载目录  train-true为训练集,false加载测试集  download-是否下载  transform-是否将PIL转换为tensor
train_set = torchvision.datasets.CIFAR10(root="./dataset", train=True, transform=dataset_transformer, download=True)
test_set = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=dataset_transformer, download=True)

print(test_set[0])       # 输出: tensor(..., 3)  tensor, 类别
print(test_set.classes)  # 输出:总共十种类型,['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

img, target = test_set[0]
print(img)      # 输出:图片的tensor
print(target)   # 输出:类别 3
print(test_set.classes[target])  # 输出: cat
img.show()

writer = SummaryWriter("logs")
for i in range(10):
    img, target = test_set[i]
    writer.add_image("Test_set", img, i)

writer.close()

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
可以使用Transformer模型来进行CIFAR-10图像分类,以下是一个使用PyTorch实现的示例代码: ```python import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms from torch.utils.data import DataLoader # 定义Transformer模型 class TransformerModel(nn.Module): def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_layers): super(TransformerModel, self).__init__() self.model_type = 'Transformer' self.pos_encoder = PositionalEncoding(input_dim, dropout=0.1) encoder_layers = nn.TransformerEncoderLayer(input_dim, num_heads, hidden_dim, dropout=0.1) self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers) self.fc = nn.Linear(input_dim, output_dim) def forward(self, x): x = self.pos_encoder(x) x = self.transformer_encoder(x) x = x.mean(dim=0) x = self.fc(x) return x # 定义位置编码器 class PositionalEncoding(nn.Module): def __init__(self, d_model, dropout=0.1, max_len=5000): super(PositionalEncoding, self).__init__() self.dropout = nn.Dropout(p=dropout) pe = torch.zeros(max_len, d_model) position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0).transpose(0, 1) self.register_buffer('pe', pe) def forward(self, x): x = x + self.pe[:x.size(0), :] return self.dropout(x) # 加载数据集 transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) trainloader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) testloader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2) # 定义超参数 input_dim = 3 * 32 * 32 hidden_dim = 256 output_dim = 10 num_heads = 8 num_layers = 4 lr = 0.001 num_epochs = 10 # 初始化模型和优化器 model = TransformerModel(input_dim, hidden_dim, output_dim, num_heads, num_layers) optimizer = optim.Adam(model.parameters(), lr=lr) # 训练模型 for epoch in range(num_epochs): model.train() running_loss = 0.0 for i, data in enumerate(trainloader, 0): inputs, labels = data inputs = inputs.view(-1, input_dim) optimizer.zero_grad() outputs = model(inputs) loss = nn.CrossEntropyLoss()(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 99: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 # 测试模型 model.eval() correct = 0 total = 0 with torch.no_grad(): for data in testloader: inputs, labels = data inputs = inputs.view(-1, input_dim) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Epoch %d, Test accuracy: %d %%' % (epoch + 1, 100 * correct / total)) ``` 在这个示例中,我们使用了一个4层的Transformer模型来进行CIFAR-10图像分类,其中每层都包含8个注意力头和256个隐藏单元。我们使用Adam优化器来训练模型,学习率为0.001,迭代10个周期。在每个周期结束时,我们对模型进行测试,并输出测试准确率。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值