数据集为imagenet的格式,分为训练和验证两个文件夹,训练文件的子文件夹的名字代表具体的类别。
——ImageNet
——train
——cls1
——cls1_00.jpg
——cls1_01.jpg
...
——cls1_59.jpg
——cls2
——clsn
——test
——cls1
——cls1_60.jpg
——cls1_61.jpg
...
——cls1_100.jpg
——cls2
——clsn
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch import nn
import torch.optim as optim
from torchvision import transforms, models
import torch
class VGGNet(nn.Module):
def __init__(self, num_classes=2):
super(VGGNet, self).__init__()
net = models.vgg16(pretrained=True)
net.classifier = nn.Sequential()
self.features = net
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 128),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(128, num_classes),
)
def forward(self, x):
print("x.shape =" ,x.shape) #打印这行确定导出为onnx的输入的大小
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
import torch.nn as nn
# 定义LeNet网络结构
# class LeNet(nn.Module):
# def __init__(self, num_classes=2):
# super(LeNet, self).__init__()
# self.features = nn.Sequential(
# nn.Conv2d(3, 6, kernel_size=5),
# nn.Tanh(),
# nn.MaxPool2d(kernel_size=2),
# nn.Conv2d(6, 16, kernel_size=5),
# nn.Tanh(),
# nn.MaxPool2d(kernel_size=2)
# )
# self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
# self.classifier = nn.Sequential(
# nn.Linear(28*28, 32),
# nn.Tanh(),
# nn.Linear(32, 84),
# nn.Tanh(),
# nn.Linear(84, num_classes)
# )
#
# def forward(self, x):
# x = self.features(x)
# x = self.avgpool(x)
# # x = torch.flatten(x, 1)
# x = x.view(x.size(0), -1)
# x = self.classifier(x)
# return x
learning_rate = 0.001
batch_size = 64
num_epochs = 100
class CustomDataset(Dataset):
def __init__(self, data_dir, transform=None):
self.classes = sorted(os.listdir(data_dir))
self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}
# print(self.class_to_idx)
self.samples = []
for cls_name in self.classes:
cls_dir = os.path.join(data_dir, cls_name)
for filename in os.listdir(cls_dir):
path = os.path.join(cls_dir, filename)
self.samples.append((path, self.class_to_idx[cls_name]))
self.transform = transform
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
path, label = self.samples[idx]
with open(path, 'rb') as f:
img = Image.open(f).convert('RGB')
if self.transform is not None:
img = self.transform(img)
return img, label
# 定义数据集路径和转换操作
data_dir = 'C:\\mao\\code\\Tensorrt\\hymenoptera_data\\train'
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
# 创建自定义数据集实例和数据加载器
train_dataset = CustomDataset(data_dir, transform=transform)
train_dataloader = DataLoader(train_dataset , batch_size=32, shuffle=True)
test_data_dir = 'C:\\mao\\code\\Tensorrt\\hymenoptera_data\\val'
val_dataset = CustomDataset(test_data_dir, transform=transform)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
# 定义模型和优化器
# model = net.cuda()
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# 定义模型和优化器
# model = LeNet()
model = VGGNet()
if torch.cuda.is_available():
model.cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for name,parameters in model.named_parameters():
print(name,':',parameters.size())
# 填该层参数名
# 开始训练循环
for epoch in range(num_epochs):
# 训练阶段
model.train()
for batch_idx, (data, targets) in enumerate(train_dataloader):
# 将数据移动到设备上(例如 GPU)
data, targets = data.to(device), targets.to(device)
# 前向传播、计算损失和反向传播
output = model(data)
loss = nn.CrossEntropyLoss()(output, targets)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# 打印训练进度
if batch_idx % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, batch_idx+1, len(train_dataloader), loss.item()))
# 验证阶段
model.eval()
with torch.no_grad():
correct = 0
total = 0
for data, targets in val_dataloader:
# 将数据移动到设备上(例如 GPU)
data, targets = data.to(device), targets.to(device)
# 前向传播并计算准确率
output = model(data)
_, predicted = torch.max(output.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
accuracy = 100 * correct / total
print('Epoch [{}/{}], Validation Accuracy: {:.2f}%'
.format(epoch+1, num_epochs, accuracy))
# 将数据移动到设备上(例如 GPU)
# data, targets = data.to(device), targets.to(device)
#
# # 前向传播、计算损失和反向传播
# output = model(data)
# loss = ...
# loss.backward()
# optimizer.step()
#
# # 打印训练进度
# if batch_idx % log_interval == 0:
# print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
# .format(epoch+1, num_epochs, batch_idx+1, len(dataloader), loss.item()))
torch.save(model.state_dict(), 'vgg.pt')
将训练好的权重导出为onnx,导出的大小是通过加载数据集训练时的transform = transforms.Compose函数确定的。
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch import nn
import torch.optim as optim
from torchvision import transforms, models
import torch
class VGGNet(nn.Module):
def __init__(self, num_classes=2):
super(VGGNet, self).__init__()
net = models.vgg16(pretrained=True)
net.classifier = nn.Sequential()
self.features = net
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 128),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(128, num_classes),
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
model = VGGNet()
# 查看输入张量形状
# print(model.features)
# print(model.features[0].weight.shape) # 输入为3通道的224x224图像
# 导出为ONNX文件
# torch.onnx.export(model, dummy_input, 'vgg16.onnx', verbose=True)
# 加载VGG16模型
vgg16 = models.vgg16(pretrained=False)
model.load_state_dict(torch.load('C:\\mao\\code\\python\\Classification\\VGG\\vgg.pt'))
dummy_input = torch.randn(32, 3, 224, 224)
# 导出为ONNX文件
torch.onnx.export(model, dummy_input, 'vgg16.onnx', verbose=True)