nnUnet代码解读--优化策略

优化策略

nnUNetTrainer
def initialize_optimizer_and_scheduler(self):
    assert self.network is not None, "self.initialize_network must be called first"
    self.optimizer = torch.optim.Adam(self.network.parameters(), self.initial_lr, weight_decay=self.weight_decay, amsgrad=True)
    self.lr_scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='min', factor=0.2,
                                                       patience=self.lr_scheduler_patience,
                                                       verbose=True, threshold=self.lr_scheduler_eps, threshold_mode="abs")
  • self.initial_lr = 0.01
  • self.lr_scheduler_eps = 0.001
  • self.weight_decay = 3e-5

学习率衰减到lr_scheduler_eps 停止衰减,其实就刚开始两三个epoch学习率在0.01和0.001之间,后面的学习率就都是0.001了。

nnUNetTrainerV2
def initialize_optimizer_and_scheduler(self):
    assert self.network is not None, "self.initialize_network must be called first"
    self.optimizer = torch.optim.SGD(self.network.parameters(), self.initial_lr, weight_decay=self.weight_decay,
                                     momentum=0.99, nesterov=True)
    self.lr_scheduler = None

V2使用的是学习率指数衰减,每个epoch更新学习率

	def poly_lr(epoch, max_epochs, initial_lr, exponent=0.9):
    	return initial_lr * (1 - epoch / max_epochs)**exponent
    
    def maybe_update_lr(self, epoch=None):
        """
        if epoch is not None we overwrite epoch. Else we use epoch = self.epoch + 1

        (maybe_update_lr is called in on_epoch_end which is called before epoch is incremented.
        herefore we need to do +1 here)

        :param epoch:
        :return:
        """
        if epoch is None:
            ep = self.epoch + 1
        else:
            ep = epoch
        self.optimizer.param_groups[0]['lr'] = poly_lr(ep, self.max_num_epochs, self.initial_lr, 0.9)
        self.print_to_log_file("lr:", np.round(self.optimizer.param_groups[0]['lr'], decimals=6))
自定义余弦衰减

分享一下我常用的学习率衰减策略

def cosine_scheduler(base_value, final_value, epochs, niter_per_ep, warmup_epochs=0, start_warmup_value=0.):
    warmup_schedule = np.array([])
    warmup_iters = warmup_epochs * niter_per_ep
    if warmup_epochs > 0:
        warmup_schedule = np.linspace(start_warmup_value, base_value, warmup_iters)

    iters = np.arange(epochs * niter_per_ep - warmup_iters)
    schedule = final_value + 0.5 * (base_value - final_value) * (1 + np.cos(np.pi * iters / len(iters)))

    schedule = np.concatenate((warmup_schedule, schedule))
    assert len(schedule) == epochs * niter_per_ep
    return schedule
  • 从一个很小的学习率预热到基线学习率(base_learning_rate)
  • 在每个iteration而不是epoch中更新学习率

学习率更新的代码我放在下面作为参考

for it in range(self.num_batches_per_epoch):
    it = self.num_batches_per_epoch * self.epoch + it
    param_group = self.optimizer.param_groups[0]
    param_group['lr'] = self.lr_scheduler[it]

对比实验

假设初始学习率都为0.01,最大迭代次数为100,下图是三种不同的学习率衰减曲线,注意这里的余弦衰减的横坐标是iteration

在这里插入图片描述

实验代码我也放在这里了,直接运行就好

import math
import numpy as np
import matplotlib.pyplot as plt


def schedule1(epochs,lr,factor,threshold):
    lr_list = [max(lr * math.pow(factor, x),threshold) for x in range(epochs)]
    return lr_list


def schedule2(epochs,lr,exponent):
    lr_list = [lr * (1 - x / epochs) ** exponent for x in range(epochs)]
    return lr_list


def cosine_scheduler(base_value, final_value, epochs, niter_per_ep, warmup_epochs=0, start_warmup_value=0.):
    warmup_schedule = np.array([])
    warmup_iters = warmup_epochs * niter_per_ep
    if warmup_epochs > 0:
        warmup_schedule = np.linspace(start_warmup_value, base_value, warmup_iters)

    iters = np.arange(epochs * niter_per_ep - warmup_iters)
    schedule = final_value + 0.5 * (base_value - final_value) * (1 + np.cos(np.pi * iters / len(iters)))

    schedule = np.concatenate((warmup_schedule, schedule))
    assert len(schedule) == epochs * niter_per_ep
    return schedule


max_epochs = 100
num_batches_per_epoch = 250
initial_lr = 0.01
factor = 0.2
lr_scheduler_eps = 0.001
exponent = 0.9
warmup_epochs = 10

lr_list1 = schedule1(max_epochs,initial_lr,factor,threshold=lr_scheduler_eps)
lr_list2 = schedule2(max_epochs,initial_lr,exponent)
lr_list3 = cosine_scheduler(initial_lr,lr_scheduler_eps,max_epochs,num_batches_per_epoch,warmup_epochs,start_warmup_value=5e-4)
plt.subplot(131),plt.plot(lr_list1),plt.title("ReduceLROnPlateau")
plt.subplot(132),plt.plot(lr_list2),plt.title("PolyScheduler")
plt.subplot(133),plt.plot(lr_list3),plt.title("CosineScheduler")
plt.show()

码字不易,有用的话还请点个赞,后面会继续更新nnUnet的相关内容。

这里提供一个简单的UNet代码示例,用于图像分割任务: ```python import torch import torch.nn as nn import torch.nn.functional as F class DoubleConv(nn.Module): def __init__(self, in_channels, out_channels): super(DoubleConv, self).__init__() self.conv = nn.Sequential( nn.Conv2d(in_channels, out_channels, 3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True), nn.Conv2d(out_channels, out_channels, 3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) ) def forward(self, x): return self.conv(x) class UNet(nn.Module): def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]): super(UNet, self).__init__() self.ups = nn.ModuleList() self.downs = nn.ModuleList() self.pool = nn.MaxPool2d(kernel_size=2, stride=2) # Down part of UNet for feature in features: self.downs.append(DoubleConv(in_channels, feature)) in_channels = feature # Up part of UNet for feature in reversed(features): self.ups.append(nn.ConvTranspose2d(feature*2, feature, kernel_size=2, stride=2)) self.ups.append(DoubleConv(feature*2, feature)) self.bottleneck = DoubleConv(features[-1], features[-1]*2) self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1) def forward(self, x): skip_connections = [] for down in self.downs: x = down(x) skip_connections.append(x) x = self.pool(x) x = self.bottleneck(x) skip_connections = skip_connections[::-1] for idx in range(0, len(self.ups), 2): x = self.ups[idx](x) skip_connection = skip_connections[idx//2] if x.shape != skip_connection.shape: x = F.interpolate(x, size=skip_connection.shape[2:], mode='bilinear', align_corners=True) concat_skip = torch.cat((skip_connection, x), dim=1) x = self.ups[idx+1](concat_skip) return self.final_conv(x) ``` 这里的代码实现了一个简单的UNet模型,其中包含了双卷积层和上采样层,用于图像分割任务。在构建模型时,可以根据任务需要自行修改输入、输出通道数以及特征层数组`features`。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

宁远x

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值