使用transfomrer的编码器实现minist数字识别
1.数据预处理
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets,transforms
from torch.utils.data import DataLoader
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)
2. 定义transformer模型
class TransformerModel(nn.Module):
def __init__(self, input_dim, num_classes,n_heads=4,num_encoder_layers=3):
super(TransformerModel, self).__init__()
self.positional_encoding=nn.Parameter(torch.zeros(1, 28, 28))
encoder_layers=self.transformer=nn.TransformerEncoderLayer(d_model=28,nhead=n_heads)
self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_encoder_layers)
self.fc = nn.Linear(28, num_classes)
def forward(self,x):
#batch_size为16,图片是28×28,这里 x torch.Size([16, 1, 28, 28])
x=x.view(x.size(0),28,28) #torch.Size([16, 28, 28])
pos_enc = self.positional_encoding
x+= pos_enc
x = x.permute(1, 0, 2) # 转换为trans需要 (sequence_length, batch_size, embedding_dim)
x=self.transformer_encoder(x) #torch.Size([28 ,16, 28])
x=x.mean(dim=0) #[sequence_length, batch_size, embedding_dim]
x=self.fc(x)
return x #torch.Size([16,10]) [batch_size,num_classes]
model = TransformerModel(input_dim=28, num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
3. 训练
def train(model,train_loader,criterion,optimizer,epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}'
f' ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')
# 训练和评估
for epoch in range(1, 11):
train(model, train_loader, criterion, optimizer, epoch)
训练结果:
4.测试
#torch.save(model.state_dict(), './model/model_minist.pth')
# model.load_state_dict(torch.load('model.pth'))
def test(model, test_loader, criterion):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)}'
f' ({100. * correct / len(test_loader.dataset):.0f}%)\n')
# 训练和评估
test(model, test_loader, criterion)
5.测试单张图片的可视化
from PIL import Image
import matplotlib.pyplot as plt
model = TransformerModel(input_dim=28, num_classes=10)
model.load_state_dict(torch.load('C:\\Users\\xxxxx\\Desktop\\code\\transformer\\tran_minisit\\pythonProject\\model\\model_minist.pth'))
model.eval()
image_path="08.jpg"
image = Image.open(image_path).convert('L')
input_size = 28
transform = transforms.Compose([
transforms.Resize((input_size, input_size)),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
image_tensor = transform(image).unsqueeze(0) # 添加 batch_size
# 3. 执行推断
with torch.no_grad():
output = model(image_tensor)
output_logits = output.squeeze().numpy()
output_probs = torch.nn.functional.softmax(output, dim=1).squeeze().numpy()
# 打印模型预测的原始值和概率
print("模型原始输出(logits):", output_logits)
print("预测概率:", output_probs)
# 4. 可视化输入图像和预测结果
plt.figure(figsize=(14, 6))
# 输入图像
plt.subplot(1, 3, 1)
plt.title("Input Image")
plt.imshow(image, cmap='gray')
# 模型原始输出(logits)
plt.subplot(1, 3, 2)
plt.title("Model Logits")
plt.bar(range(len(output_logits)), output_logits)
plt.xlabel('Class')
plt.ylabel('Logit Value')
# 预测概率
plt.subplot(1, 3, 3)
plt.title("Prediction Probabilities")
plt.bar(range(len(output_probs)), output_probs)
plt.xlabel('Class')
plt.ylabel('Probability')
plt.tight_layout()
plt.show()
结果: