首先我们导入需要的库:
import os
import sys
import cv2
import json
from PIL import Image
from torch.utils.data import DataLoader, random_split
import torchvision.models as models
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from tqdm import tqdm
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
之后添加这段代码,debug的时候用:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
之后定义准确率和损失函数的图:
def plot_loss(x, history):
plt.plot(x, history['val_loss'], label='val', marker='o')
plt.plot(x, history['train_loss'], label='train', marker='o')
plt.title('Loss per epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(), plt.grid()
plt.savefig('./weight/loss.png')
plt.show()
def plot_acc(x, history):
plt.plot(x, history['train_acc'], label='train_acc', marker='x')
plt.plot(x, history['val_acc'], label='val_acc', marker='x')
plt.title('Acc per epoch')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(), plt.grid()
plt.savefig('./weight/acc.png')
plt.show()
再定义训练函数:
def train_and_val(epochs, model, train_loader, len_train,val_loader, len_val,criterion, optimizer,device):
torch.cuda.empty_cache()
train_loss = []
val_loss = []
train_acc = []
val_acc = []
best_acc = 0
model.to(device)
fit_time = time.time()
for e in range(epochs):
since = time.time()
running_loss = 0
training_acc = 0
with tqdm(total=len(train_loader)) as pbar:
for image, label in train_loader:
model.train()
optimizer.zero_grad()
image = image.to(device)
label = label.to(device)
# forward
output = model(image)
loss = criterion(output, label)
predict_t = torch.max(output, dim=1)[1]
# backward
loss.backward()
optimizer.step() # update weight
running_loss += loss.item()
training_acc += torch.eq(predict_t, label).sum().item()
pbar.update(1)
model.eval()
val_losses = 0
validation_acc = 0
# validation loop
with torch.no_grad():
with tqdm(total=len(val_loader)) as pb:
for image, label in val_loader:
image = image.to(device)
label = label.to(device)
output = model(image)
# loss
loss = criterion(output, label)
predict_v = torch.max(output, dim=1)[1]
val_losses += loss.item()
validation_acc += torch.eq(predict_v, label).sum().item()
pb.update(1)
# calculatio mean for each batch
train_loss.append(running_loss / len_train)
val_loss.append(val_losses / len_val)
train_acc.append(training_acc / len_train)
val_acc.append(validation_acc / len_val)
torch.save(model, "./weight/last.pth")
if best_acc <(validation_acc / len_val):
torch.save(model, "./weight/best.pth")
print("Epoch:{}/{}..".format(e + 1, epochs),
"Train Acc: {:.3f}..".format(training_acc / len_train),
"Val Acc: {:.3f}..".format(validation_acc / len_val),
"Train Loss: {:.3f}..".format(running_loss / len_train),
"Val Loss: {:.3f}..".format(val_losses / len_val),
"Time: {:.2f}s".format((time.time() - since)))
history = {'train_loss': train_loss, 'val_loss': val_loss ,'train_acc': train_acc, 'val_acc': val_acc}
print('Total time: {:.2f} m'.format((time.time() - fit_time) / 60))
return history
接下来定义resnet网络,小编在这里建议大家可以使用预训练的网络,不用重新花费大量的时间去训练,如果使用的是预训练好的网络则可以不用管这段代码:
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
"""
注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
这么做的好处是能够在top1上提升大概0.5%的准确率。
可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
"""
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
kernel_size=1, stride=1, bias=False) # squeeze channels
self.bn1 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False) # unsqueeze channels
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,
block,
blocks_num,
num_classes=10,
include_top=True,
groups=1,
width_per_group=64):
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64
self.groups = groups
self.width_per_group = width_per_group
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, blocks_num[0])
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel,
groups=self.groups,
width_per_group=self.width_per_group))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet34(num_classes=10472, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=10, include_top=True):
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=10472, include_top=True):
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
def resnext50_32x4d(num_classes=10, include_top=True):
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
def resnext101_32x8d(num_classes=10, include_top=True):
groups = 32
width_per_group = 8
return ResNet(Bottleneck, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
接下来就是开始训练模型了:
if __name__ == '__main__':
if not os.path.exists('./weight'):
os.makedirs('./weight')
path = "D:\Python Data pack\Traffic_sign\myData"
count = 0
images = []
classNo = []
myList = os.listdir(path)
print("Total Classes Detected:", len(myList))
noOfClasses = len(myList)
print("Importing Classes.....")
for x in range(0, len(myList)):
myPicList = os.listdir(path + "/" + str(count))
for y in myPicList:
curImg = cv2.imread(path + "/" + str(count) + "/" + y)
images.append(curImg)
classNo.append(count)
print(count, end=" ")
count += 1
print(" ")
# 存着对应的图片信息和标签
images = np.array(images)
classNo = np.array(classNo)
首先我们查看是否有weight这个文件夹,如果没有的话创建一个,之后存储训练好的模型。
接着我们读取每一个文件夹,然后将文件夹的个数打印出来(标签数)。
# 设置训练集和验证集
if torch.cuda.is_available():
# 如果有可用的 CUDA 设备,将设备设置为 GPU
device = torch.device("cuda:0")
print("Using CUDA device")
else:
# 如果没有可用的 CUDA 设备,将设备设置为 CPU
device = torch.device("cpu")
print("Using CPU device")
BATCH_SIZE = 20
接着设置BNsize为20,
这里由于数据集里每个类别图片较少,所以采用较多的数据增强操作
# 这里由于数据集里每个类别图片较少,所以采用较多的数据增强操作
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
transforms.RandomRotation(degrees=30),
transforms.RandomVerticalFlip(),
transforms.RandomGrayscale(p=0.1),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
"val": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
transforms.RandomRotation(degrees=30),
transforms.RandomVerticalFlip(),
transforms.RandomGrayscale(p=0.1),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
然后我们创建数据集;
# 创建自定义数据集
#这里的数据集根据自身路径进行更改,我这里将每个同类型的图片放入了一个文件夹中,创建了10472个文件夹(标签)
# train_dataset = torch.utils.data.TensorDataset(images, classNo)
train_dataset = datasets.ImageFolder("D:/Python Data pack/Traffic_sign/myData",
transform=data_transform["train"]) # 训练集数据
"""
这里使用的datasets.ImageFolder类,可以将指定路径下的文件夹进行读取,会自动将里面的每一个子文件夹名称视为标签(从1开始依次向后),
所以这里这个方法只适用于多个文件夹,每个文件夹里包含同类型图片的情况;
"""
train_size = int(len(train_dataset) * 0.9)
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True,
num_workers=2) # 加载数据
#train_loader.dataset.transform = data_transform["train"]
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False,
num_workers=2) # 加载数据
最后设置好参数,开始训练:
len_val = len(val_dataset)
len_train = len(train_dataset)
# 设置resnet,并开始训练
net = models.resnet101(pretrained=True)# 这里选用了预训练模型resnet101
num_classes = 43 # 标签类别数
net.fc = nn.Linear(net.fc.in_features, num_classes)
loss_function = nn.CrossEntropyLoss() # 设置损失函数:交叉熵损失函数,这个函数要求标签必须是自然数字
optimizer = optim.Adam(net.parameters(), lr=0.0001) # 设置优化器和学习率
epoch = 60
history = train_and_val(epoch, net, train_loader, len_train,val_loader, len_val,loss_function, optimizer,device)
plot_loss(np.arange(0,epoch), history)
plot_acc(np.arange(0,epoch), history)
# 这里的训练时间太长了,每一轮的训练时间在7min左右
这里我的数据集标签共有43类,图片数量在40000张左右,训练60轮,使用的交叉熵损失函数;运行了5h。
然后这是我的训练阶段全部代码:
import os
import sys
import cv2
import json
from PIL import Image
from torch.utils.data import DataLoader, random_split
import torchvision.models as models
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from tqdm import tqdm
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
def plot_loss(x, history):
plt.plot(x, history['val_loss'], label='val', marker='o')
plt.plot(x, history['train_loss'], label='train', marker='o')
plt.title('Loss per epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(), plt.grid()
plt.savefig('./weight/loss.png')
plt.show()
def plot_acc(x, history):
plt.plot(x, history['train_acc'], label='train_acc', marker='x')
plt.plot(x, history['val_acc'], label='val_acc', marker='x')
plt.title('Acc per epoch')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(), plt.grid()
plt.savefig('./weight/acc.png')
plt.show()
# 定义训练、验证函数
def train_and_val(epochs, model, train_loader, len_train,val_loader, len_val,criterion, optimizer,device):
torch.cuda.empty_cache()
train_loss = []
val_loss = []
train_acc = []
val_acc = []
best_acc = 0
model.to(device)
fit_time = time.time()
for e in range(epochs):
since = time.time()
running_loss = 0
training_acc = 0
with tqdm(total=len(train_loader)) as pbar:
for image, label in train_loader:
model.train()
optimizer.zero_grad()
image = image.to(device)
label = label.to(device)
# forward
output = model(image)
loss = criterion(output, label)
predict_t = torch.max(output, dim=1)[1]
# backward
loss.backward()
optimizer.step() # update weight
running_loss += loss.item()
training_acc += torch.eq(predict_t, label).sum().item()
pbar.update(1)
model.eval()
val_losses = 0
validation_acc = 0
# validation loop
with torch.no_grad():
with tqdm(total=len(val_loader)) as pb:
for image, label in val_loader:
image = image.to(device)
label = label.to(device)
output = model(image)
# loss
loss = criterion(output, label)
predict_v = torch.max(output, dim=1)[1]
val_losses += loss.item()
validation_acc += torch.eq(predict_v, label).sum().item()
pb.update(1)
# calculatio mean for each batch
train_loss.append(running_loss / len_train)
val_loss.append(val_losses / len_val)
train_acc.append(training_acc / len_train)
val_acc.append(validation_acc / len_val)
torch.save(model, "./weight/last.pth")
if best_acc <(validation_acc / len_val):
torch.save(model, "./weight/best.pth")
print("Epoch:{}/{}..".format(e + 1, epochs),
"Train Acc: {:.3f}..".format(training_acc / len_train),
"Val Acc: {:.3f}..".format(validation_acc / len_val),
"Train Loss: {:.3f}..".format(running_loss / len_train),
"Val Loss: {:.3f}..".format(val_losses / len_val),
"Time: {:.2f}s".format((time.time() - since)))
history = {'train_loss': train_loss, 'val_loss': val_loss ,'train_acc': train_acc, 'val_acc': val_acc}
print('Total time: {:.2f} m'.format((time.time() - fit_time) / 60))
return history
# 接下来定义Res-net函数,这里的模型是定义了完整的网络,没有预训练;若要预训练模型可直接下载,则无需看下面的代码:
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
"""
注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
这么做的好处是能够在top1上提升大概0.5%的准确率。
可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
"""
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
kernel_size=1, stride=1, bias=False) # squeeze channels
self.bn1 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False) # unsqueeze channels
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,
block,
blocks_num,
num_classes=10,
include_top=True,
groups=1,
width_per_group=64):
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64
self.groups = groups
self.width_per_group = width_per_group
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, blocks_num[0])
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel,
groups=self.groups,
width_per_group=self.width_per_group))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet34(num_classes=10472, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=10, include_top=True):
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=10472, include_top=True):
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
def resnext50_32x4d(num_classes=10, include_top=True):
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
def resnext101_32x8d(num_classes=10, include_top=True):
groups = 32
width_per_group = 8
return ResNet(Bottleneck, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
if __name__ == '__main__':
if not os.path.exists('./weight'):
os.makedirs('./weight')
path = "D:\Python Data pack\Traffic_sign\myData"
count = 0
images = []
classNo = []
myList = os.listdir(path)
print("Total Classes Detected:", len(myList))
noOfClasses = len(myList)
print("Importing Classes.....")
for x in range(0, len(myList)):
myPicList = os.listdir(path + "/" + str(count))
for y in myPicList:
curImg = cv2.imread(path + "/" + str(count) + "/" + y)
images.append(curImg)
classNo.append(count)
print(count, end=" ")
count += 1
print(" ")
# 存着对应的图片信息和标签
images = np.array(images)
classNo = np.array(classNo)
# 设置训练集和验证集
if torch.cuda.is_available():
# 如果有可用的 CUDA 设备,将设备设置为 GPU
device = torch.device("cuda:0")
print("Using CUDA device")
else:
# 如果没有可用的 CUDA 设备,将设备设置为 CPU
device = torch.device("cpu")
print("Using CPU device")
BATCH_SIZE = 20
# 这里由于数据集里每个类别图片较少,所以采用较多的数据增强操作
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
transforms.RandomRotation(degrees=30),
transforms.RandomVerticalFlip(),
transforms.RandomGrayscale(p=0.1),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
"val": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
transforms.RandomRotation(degrees=30),
transforms.RandomVerticalFlip(),
transforms.RandomGrayscale(p=0.1),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
# 创建自定义数据集
#这里的数据集根据自身路径进行更改,我这里将每个同类型的图片放入了一个文件夹中,创建了10472个文件夹(标签)
# train_dataset = torch.utils.data.TensorDataset(images, classNo)
train_dataset = datasets.ImageFolder("D:/Python Data pack/Traffic_sign/myData",
transform=data_transform["train"]) # 训练集数据
"""
这里使用的datasets.ImageFolder类,可以将指定路径下的文件夹进行读取,会自动将里面的每一个子文件夹名称视为标签(从1开始依次向后),
所以这里这个方法只适用于多个文件夹,每个文件夹里包含同类型图片的情况;
"""
train_size = int(len(train_dataset) * 0.9)
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True,
num_workers=2) # 加载数据
#train_loader.dataset.transform = data_transform["train"]
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False,
num_workers=2) # 加载数据
#val_loader.dataset.transform = data_transform["val"]
len_val = len(val_dataset)
len_train = len(train_dataset)
# 设置resnet,并开始训练
net = models.resnet101(pretrained=True)# 这里选用了预训练模型resnet101
num_classes = 43 # 标签类别数
net.fc = nn.Linear(net.fc.in_features, num_classes)
loss_function = nn.CrossEntropyLoss() # 设置损失函数:交叉熵损失函数,这个函数要求标签必须是自然数字
optimizer = optim.Adam(net.parameters(), lr=0.0001) # 设置优化器和学习率
epoch = 60
history = train_and_val(epoch, net, train_loader, len_train,val_loader, len_val,loss_function, optimizer,device)
plot_loss(np.arange(0,epoch), history)
plot_acc(np.arange(0,epoch), history)
# 这里的训练时间太长了,每一轮的训练时间在7min左右
训练好的文件存储在weight文件夹中,接着我们使用刚才训练好二模型去进行预测;
以下是预测代码:
import torch
from sklearn.metrics import accuracy_score
from torchvision import transforms, datasets
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns, pandas as pd
import matplotlib.pyplot as plt
if __name__ == '__main__':
model = torch.load("./weight/best.pth")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class_correct = [0.] * 10
class_total = [0.] * 10
y_test, y_pred = [], []
X_test = []
BATCH_SIZE = 16
data_transform = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
val_dataset = datasets.ImageFolder("./data/valid/", transform=data_transform) # 测试集数据
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False,
num_workers=2) # 加载数据
classes = val_dataset.classes
with torch.no_grad():
for images, labels in val_loader:
X_test.extend([_ for _ in images])
outputs = model(images.to(device))
_, predicted = torch.max(outputs, 1)
predicted = predicted.cpu()
c = (predicted == labels).squeeze()
for i, label in enumerate(labels):
class_correct[label] += c[i].item()
class_total[label] += 1
y_pred.extend(predicted.numpy())
y_test.extend(labels.cpu().numpy())
for i in range(10):
print(f"Acuracy of {classes[i]:5s}: {100 * class_correct[i] / class_total[i]:2.0f}%")
ac = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred, target_names=classes)
print("Accuracy is :", ac)
print(cr)
labels = pd.DataFrame(cm).applymap(lambda v: f"{v}" if v != 0 else f"")
plt.figure(figsize=(7, 5))
sns.heatmap(cm, annot=labels, fmt='s', xticklabels=classes, yticklabels=classes, linewidths=0.1)
plt.show()
到此就结束了,谢谢您的观看。