B站UP:霹雳吧啦Wz
课程合集链接:1.1 卷积神经网络基础_哔哩哔哩_bilibili
代码参考B站UP:霹雳吧啦Wz的《深度学习-图像分类篇章》视频,代码根据个人编程习惯及本人自用数据集进行少量改动,欢迎大佬们批评指正。
1 网络结构
2 代码实现
2.1 模型搭建
# 迁移学习是通过把预训练参数迁移给我们自己搭建的相同命名的参数,所以要注意参数的命名和源码中命名保持一致
import torch
# resnet网络分成四个大的卷积模块,每个卷积模块中都包含了好几个残差结构,下一个卷积模块的长宽尺寸都是上一个卷积模块尺寸的一半
# 每个卷积模块中的残差结构都是通过实线跳连接的,而每个卷积模块之间都是通过虚线进行跳连接的
# 这个虚线的跳连接就是下面设置的downsample参数,也就是下采样操作,将长宽尺寸缩小一半
class BasicBlock(torch.nn.Module): # 18/34层网络的残差结构
expansion = 1 # 18/34层网络的每个残差结构中的两个卷积层卷积核个数都是一样的,所以等于1
# 但是对于50/101/152层网络的每个残差结构,第三个卷积层的卷积核个数是前两个卷积层的卷积核个数的4倍,所以对于50/101/152层网络expansion应该等于4
def __init__(self, in_channels, out_channels, stride=1, downsample=None): # downsample为下采样操作(即虚线连接的结构),可以看到残差网络的每个卷积模块到下一个卷积模块之间都进行了一步下采样操作,使图像的尺寸变成原来的一半
super(BasicBlock, self).__init__()
self.conv1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) # 不使用偏置参数,对于BN来说,使不使用偏置参数都一样
self.bn1 = torch.nn.BatchNorm2d(out_channels)
self.conv2 = torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
self.relu = torch.nn.ReLU(inplace=True)
self.bn2 = torch.nn.BatchNorm2d(out_channels) # BN操作一般放在卷积和激活函数之间
self.downsample = downsample # 即虚线结构(对34层结构,是一个1x1的卷积核,步长是2,这个下采样分支的channel和非跳连接支路的输出通道保持相等)
def forward(self, x):
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
x = self.downsample(x) # 如果downsample不是None,则进行下采样操作
out += x
out = self.relu(out)
return out
class Bottleneck(torch.nn.Module): # 50/101/152层网络的残差结构
expansion = 4 # 对于50/101/152层网络的每个残差结构,第三个卷积层的卷积核个数是前两个卷积层的卷积核个数的4倍,所以对于50/101/152层网络expansion应该等于4
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = torch.nn.Conv2d(in_channels=in_channel, out_channels=width,
kernel_size=1, stride=1, bias=False) # squeeze channels
self.bn1 = torch.nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = torch.nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = torch.nn.BatchNorm2d(width)
# -----------------------------------------
self.conv3 = torch.nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False) # unsqueeze channels
self.bn3 = torch.nn.BatchNorm2d(out_channel*self.expansion)
self.relu = torch.nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(torch.nn.Module):
# block参数代表是选择使用18/34层网络的残差结构,还是使用50/101/152层网络的残差结构
# blocks_num是一个列表,每个数表示该卷积模块包含的残差结构数量,例如:如果是34层网络,那么就是[3, 4, 6, 3];如果是101层网络,那就是[3, 4, 23, 3]
# include_top参数是为了方便在resnet网络基础上搭建更复杂的网络结构
def __init__(self, block, blocks_num, num_classes=1000, include_top=True):
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channels = 64
self.conv1 = torch.nn.Conv2d(3, self.in_channels, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = torch.nn.BatchNorm2d(self.in_channels)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool1 = torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# 构建四个卷积模块的第一个残差结构
# 下面的64,128,256,512代表的是每个卷积模块的第一个残差结构的输入
self.layer1 = self._make_layer(block, 64, blocks_num[0]) # 第一个卷积模块的第一个残差结构输入的stride就等于1
# 从第二个卷积模块开始,第一个残差结构输入的stride等于2,因为从第二个卷积模块开始,每个卷积模块的第一个残差结构都是虚线跳连接,有一个下采样的操作
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool = torch.nn.AdaptiveAvgPool2d((1, 1)) # 不管图像的长宽尺寸是多少,经过平均池化后都等于长宽1x1的
self.fc = torch.nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, channel, block_num, stride=1): # channel是一个卷积模块中的一个残差结构的输入通道数
downsample = None # stride等于1的时候,是实线跳连接,不进行下采样操作
# 18/34层网络结构的第一个卷积模块的第一个残差结构是实线跳连接,但是50/101/152层网络结构的第一个卷积模块的第一个残差结构是虚线跳连接,但是不改变图像高和宽的尺寸,只改变深度
# 当stride等于2,也就是虚线跳连接,或者是50/101/152层网络的第一个卷积模块的第一个残差结构时(因为只有50/101/152层网络block.expansion=4,self.in_channels != channel * block.expansion)采用下采样操作
if stride != 1 or self.in_channels != channel * block.expansion:
# 下采样操作实际就是一个1x1大小、步长为2的卷积操作+一个BN操作(50/101/152层网络的第一个卷积模块的第一个残差结构步长还是1,只改变通道深度,不改变图像尺寸)
downsample = torch.nn.Sequential(
torch.nn.Conv2d(self.in_channels, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
torch.nn.BatchNorm2d(channel * block.expansion) )
layers = []
layers.append(block(self.in_channels, channel, downsample=downsample, stride=stride)) # 先把第一个残差结构加到layers
self.in_channels = channel * block.expansion # 50/101/152层网络经过第一个残差结构之后,通道深度会变成channel * 4,所以加一步这个操作
for _ in range(1, block_num): # 因为第一个残差结构已经加入到layers了,所以这里是从1开始,而不是从0开始,这里是把后面的实线残差结构也加入layers
layers.append(block(self.in_channels, channel)) # 因为后面几个残差结构都是实线,所以就不需要下采样操作了,stride也就等于1
return torch.nn.Sequential(*layers) # 之所以layers前面加*是表示采用非关键字参数传入,具体可以看OneNote笔记torch.nn.Sequential()部分对两种方法的解释
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet34(num_classes=1000, include_top=True):
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=1000, include_top=True):
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
2.2 模型训练
import torchvision.models.resnet # 按住ctrl鼠标点击resnet即可进入resnet的官方源码,在最前面有resnet各个层数的预训练权重文件下载地址
import os
import math
import sys
import torch
import torch.optim.lr_scheduler as lr_scheduler
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
from ResNet50_model_choice import resnet50
from utils import train_one_epoch, evaluate
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 如果检测到GPU设备使用cuda:0,如果没有GPU使用cpu
print("using {} device.".format(device)) # 输出使用的设备
if os.path.exists("dataset2_Res50_epoch50_Adam0.001_weights") is False:
os.makedirs("dataset2_Res50_epoch50_Adam0.001_weights") # 创建名为"SGD0.0005_weights"的文件夹,用于存储每个epoch的训练权重
tb_writer = SummaryWriter("dataset2_Res50_epoch50_Adam0.001_runs")
batch_size = 32
transform = {
"train":transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), # 使用的是参数是官方给出的参数
"val":transforms.Compose([transforms.Resize(256), # 之前Resize((224, 224))是缩放成长宽都为224的尺寸,这里的Resize(256)是将长宽中的短边裁剪成256(或者长边放大成256),另一条边等比例放缩,不改变原图像的长宽比例
transforms.CenterCrop(224), # 从中心进行裁剪,裁剪的图像长宽为224
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
}
# 获得数据集的路径
data_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
data_path = data_root + "/dataset/SIPaKMeD/"
# 载入训练集
train_dataset = datasets.ImageFolder(root=os.path.join(data_path, "train2"), transform=transform["train"])
train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=batch_size)
# 载入测试集
val_dataset = datasets.ImageFolder(root=os.path.join(data_path, "val2"), transform=transform["val"])
val_loader = DataLoader(dataset=val_dataset, shuffle=False, batch_size=batch_size)
print("{} images for training, {} images for Validation".format(len(train_dataset), len(val_dataset)))
# 迁移学习
model = resnet50().to(device) # 不可以直接在这里输入num_classes=5,因为预训练是进行1000分类的,需要在修改对全连接层进行修改
# print(model._modules)
model_weight_path = "./resnet50-0676ba61.pth" # 预训练权重的地址
# 当strict=True,要求预训练权重层数的键值与新构建的模型中的权重层数名称完全吻合;如果新构建的模型在层数上进行了部分微调,则上述代码就会报错:说key对应不上。
# 如果strict=False 就能够完美的解决这个问题。也即,与训练权重中与新构建网络中匹配层的键值就进行使用,没有的就默认初始化。
missing_keys, unexpected_keys = model.load_state_dict(torch.load(model_weight_path, map_location=device), strict=False)
# for param in net.parameters():
# param.requires_grad = False
# change fc layer structure
in_channel = model.fc.in_features
model.fc = torch.nn.Linear(in_channel, 5).to(device)
print(model.modules)
loss_function = torch.nn.CrossEntropyLoss()
params = [p for p in model.parameters() if p.requires_grad]
'''
# 分段学习率
optimizer = torch.optim.SGD([
{'params': model.layer1.parameters(), 'lr': 3e-4},
{'params': model.layer2.parameters(), 'lr': 3e-4},
{'params': model.layer3.parameters(), 'lr': 3e-4},
{'params': model.layer4.parameters(), 'lr': 3e-4},
{'params': model.fc.parameters()}], weight_decay=5e-4, lr=1e-3, momentum=0.9)
'''
#optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=5E-5)
optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=5E-5)
lf = lambda x: ((1 + math.cos(x * math.pi / 50)) / 2) * (1 - 0.01) + 0.01 # cosine
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
# 训练过程
for epoch in range(50):
# 训练过程
train_loss, train_acc = train_one_epoch(model=model,
optimizer=optimizer,
data_loader=train_loader,
device=device,
epoch=epoch)
scheduler.step()
# 测试过程
val_loss, val_acc = evaluate(model=model,
data_loader=val_loader,
device=device,
epoch=epoch)
tb_writer.add_scalars('loss/', {'train': train_loss, 'val': val_loss}, epoch)
tb_writer.add_scalars('accuracy/', {'train': train_acc, 'val': val_acc}, epoch)
tb_writer.add_scalar('learning_rate', optimizer.param_groups[0]["lr"], epoch)
# 保存每个epoch的训练权重,每个训练权重以model-epoch.pth的命名保存(epoch是当前epoch数值这个数字,比如epoch=0,就是model-0.pth)
torch.save(model.state_dict(), "./dataset2_Res50_epoch50_Adam0.001_weights/ResNet50_Adam0.001_model-{}.pth".format(epoch))
2.3 模型预测
import os
import torch
from PIL import Image
from torchvision import transforms
from ResNet50_model_choice import resnet50
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 图像预处理
transform = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])
# 读取所有预测集图片的地址
data_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
data_path = data_root + "/dataset/SIPaKMeD/test3/"
img_path_list = [os.path.join(data_path, i) for i in os.listdir(data_path) if i.endswith(".bmp")]
# 模型实例化
model = resnet50(num_classes=5).to(device)
# 导入训练得到的权重
model.load_state_dict(torch.load("ResNet50_SGD0.001_model-49.pth", map_location=device))
classes = ["im_Dyskeratotic", "im_Koilocytotic", "im_Metaplastic", "im_Parabasal", "im_Superficial_Intermediate"]
model.eval()
batch_size = 8 # 每次预测时将多少张图片打包成一个batch
true_num = 0
i = 0
with torch.no_grad():
for ids in range(len(img_path_list) // batch_size): # 双斜线 // 运算符代表向下取整除,循环次数等于batch的个数
for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]: # 循环次数等于batch的大小
assert os.path.exists(img_path), f"file: '{img_path}' dose not exist."
img = Image.open(img_path)
path, img_name = os.path.split(img_path) # os.path.split可以用来分割路径和文件名
img = transform(img)
img = torch.unsqueeze(img, dim=0)
output = model(img.to(device)).cpu()
predict = torch.softmax(output, dim=1).numpy() # torch.softmax返回的是一个张量形式的二维矩阵,通过.numpy()转换成数组形式的二维矩阵
predict_cla = torch.argmax(output).item() # 把张量形式的值取出来
i += 1
if str(img_name[:-10]) == classes[predict_cla]:
true_num += 1
#print("[%d] %s is predicted as %s" % (i, img_name, classes[predict_cla]))
print("The accuacy of prediction is {:.3f}%".format(100 * true_num / i))