[pytorch] 3D Unet + Resnet替换Encoder

本文介绍了如何在PyTorch中实现3DUnet的2D和3D版本,并展示了如何使用Resnet替换Unet的原始编码器。通过修改Resnet的第一层步长和Bottleneck的扩张率,使其输出尺寸与Unet编码器匹配,从而实现结合。提供了不同Resnet版本(如Resnet50、Resnet101和Resnet152)作为Unet3D的编码器的示例代码。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

本文介绍如何实现Unet的3D版本,以及如何用Resnet替换Unet原始版本的Encoder.
原版Unet的实现: U-Net(Convolutional Networks for Biomedical Image Segmentation)
Resnet的实现: [pytorch] 2D + 3D ResNet代码实现, 改写

建议先对这两种网络结构有一定的了解,如果懒得去学习的话可以直接使用第三章节U-Net_resnet_encoder的完整代码。

1. Unet

from typing import Dict
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable
import math
from functools import partial

1.1 Unet 2D 版本



class DoubleConv(nn.Sequential):
    def __init__(self, in_channels, out_channels, mid_channels=None):
        if mid_channels is None:
            mid_channels = out_channels
        super(DoubleConv, self).__init__(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )


class Down(nn.Sequential):
    def __init__(self, in_channels, out_channels):
        super(Down, self).__init__(
            nn.MaxPool2d(2, stride=2),
            DoubleConv(in_channels, out_channels)
        )


class Up(nn.Module):
    def __init__(self, in_channels, out_channels, bilinear=True):
        super(Up, self).__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
        x1 = self.up(x1)
        # [N, C, H, W]
        diff_y = x2.size()[2] - x1.size()[2]
        diff_x = x2.size()[3] - x1.size()[3]

        # padding_left, padding_right, padding_top, padding_bottom
        x1 = F.pad(x1, [diff_x // 2, diff_x - diff_x // 2,
                        diff_y // 2, diff_y - diff_y // 2])

        x = torch.cat([x2, x1], dim=1)
        x = self.conv(x)
        return x


class OutConv(nn.Sequential):
    def __init__(self, in_channels, num_classes):
        super(OutConv, self).__init__(
            nn.Conv2d(in_channels, num_classes, kernel_size=1)
        )


class UNet(nn.Module):
    def __init__(self,
                 in_channels: int = 1,
                 num_classes: int = 2,
                 bilinear: bool = True,
                 base_c: int = 64):
        super(UNet, self).__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.bilinear = bilinear

        self.in_conv = DoubleConv(in_channels, base_c)
        self.down1 = Down(base_c, base_c * 2)
        self.down2 = Down(base_c * 2, base_c * 4)
        self.down3 = Down(base_c * 4, base_c * 8)
        factor = 2 if bilinear else 1
        self.down4 = Down(base_c * 8, base_c * 16 // factor)
        self.up1 = Up(base_c * 16, base_c * 8 // factor, bilinear)
        self.up2 = Up(base_c * 8, base_c * 4 // factor, bilinear)
        self.up3 = Up(base_c * 4, base_c * 2 // factor, bilinear)
        self.up4 = Up(base_c * 2, base_c, bilinear)
        self.out_conv = OutConv(base_c, num_classes)

    def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
        x1 = self.in_conv(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.out_conv(x)

        return logits

使用例子

model = UNet(in_channels= 1,
             num_classes= 2,
             bilinear= True,
             base_c= 64)
x=torch.randn(1,1,224,224)
X=model(x)
print('model output shape =')
print(X.shape)

model output shape =
torch.Size([1, 2, 224, 224])

1.2 Unet 3D 版本

将所有层换成3d,此外对bilinear部分进行了修改

class DoubleConv_3d(nn.Sequential):
    def __init__(self, in_channels, out_channels, mid_channels=None):
        if mid_channels is None:
            mid_channels = out_channels
        super(DoubleConv_3d, self).__init__(
            nn.Conv3d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm3d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv3d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm3d(out_channels),
            nn.ReLU(inplace=True)
        )


class Down_3d(nn.Sequential):
    def __init__(self, in_channels, out_channels):
        super(Down_3d, self).__init__(
            nn.MaxPool3d(2, stride=2),
            DoubleConv_3d(in_channels, out_channels)
        )


class Up_3d(nn.Module):
    def __init__(self, in_channels, out_channels, bilinear=True):
        super(Up_3d, self).__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
            self.conv = DoubleConv_3d(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose3d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv_3d(in_channels, out_channels)

    def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
        x1 = self.up(x1)
        # [N, C, H, W, Z]
        diff_y = x2.size()[2] - x1.size()[2]
        diff_x = x2.size()[3] - x1.size()[3]
        diff_z = x2.size()[4] - x1.size()[4]

        # padding_left, padding_right, padding_top, padding_bottom
        x1 = F.pad(x1, [diff_x // 2, diff_x - diff_x // 2,
                        diff_y // 2, diff_y - diff_y // 2,
                        diff_z // 2, diff_z - diff_z // 2])

        x = torch.cat([x2, x1], dim=1)
        x = self.conv(x)
        return x


class OutConv_3d(nn.Sequential):
    def __init__(self, in_channels, num_classes):
        super(OutConv_3d, self).__init__(
            nn.Conv3d(in_channels, num_classes, kernel_size=1)
        )
        
class UNet_3d(nn.Module):
    def __init__(self,
                 in_channels: int = 1,
                 num_classes: int = 2,
                 bilinear: bool = True,
                 base_c: int = 64):
        super(UNet_3d, self).__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.bilinear = bilinear

        self.in_conv = DoubleConv_3d(in_channels, base_c)
        self.down1 = Down_3d(base_c, base_c * 2)
        self.down2 = Down_3d(base_c * 2, base_c * 4)
        self.down3 = Down_3d(base_c * 4, base_c * 8)
        factor = 2 if bilinear else 1
        self.down4 = Down_3d(base_c * 8, base_c * 16 // factor)
        self.up1 = Up_3d(base_c * 16, base_c * 8 // factor, bilinear)
        self.up2 = Up_3d(base_c * 8, base_c * 4 // factor, bilinear)
        self.up3 = Up_3d(base_c * 4, base_c * 2 // factor, bilinear)
        self.up4 = Up_3d(base_c * 2, base_c, bilinear)
        self.out_conv = OutConv_3d(base_c, num_classes)

    def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
        x1 = self.in_conv(x)
        print('x1=',x1.shape)
        x2 = self.down1(x1)
        print('x2=',x2.shape)
        x3 = self.down2(x2)
        print('x3=',x3.shape)
        x4 = self.down3(x3)
        print('x4=',x4.shape)
        x5 = self.down4(x4)
        print('x5=',x5.shape)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.out_conv(x)

        return logits

例子

model = UNet_3d(in_channels= 1,
             num_classes= 2,
             bilinear= False,
             base_c= 64)
x=torch.randn(1,1,64,64,64)
X=model(x)
print('model output shape =')
print(X.shape)


x1= torch.Size([1, 64, 64, 64, 64])
x2= torch.Size([1, 128, 32, 32, 32])
x3= torch.Size([1, 256, 16, 16, 16])
x4= torch.Size([1, 512, 8, 8, 8])
x5= torch.Size([1, 1024, 4, 4, 4])
model output shape =
torch.Size([1, 2, 64, 64, 64])
model = UNet_3d(in_channels= 1,
             num_classes= 2,
             bilinear= True,
             base_c= 64)
x=torch.randn(1,1,64,64,64)
X=model(x)
print('model output shape =')
print(X.shape)

x1= torch.Size([1, 64, 64, 64, 64])
x2= torch.Size([1, 128, 32, 32, 32])
x3= torch.Size([1, 256, 16, 16, 16])
x4= torch.Size([1, 512, 8, 8, 8])
x5= torch.Size([1, 512, 4, 4, 4])
model output shape =
torch.Size([1, 2, 64, 64, 64])

这里建议使用bilinear= False的版本,上采样通过反卷积实现。

2. Resnet

直接使用原版Resnet直接替换Unet的Encoder的话是不行的,因为卷积设置不一样,每个block的输出大小不同。我们的目的是修改Resnet网络来将Resnet不同Block的输出调成和Unet的Encoder相同的大小。修改如下:

    1. 将Resnet第一层Block的卷积层 #stride=(2, 2, 2)-> (1, 1, 1)
    1. 将Bottleneck中的expansion从4改成2
def conv3x3x3(in_planes, out_planes, stride=1, dilation=1):
    # 3x3x3 convolution with padding
    return nn.Conv3d(
        in_planes,
        out_planes,
        kernel_size=3,
        dilation=dilation,
        stride=stride,
        padding=dilation,
        bias=False)


def downsample_basic_block(x, planes, stride, no_cuda=False):
    out = F.avg_pool3d(x, kernel_size=1, stride=stride)
    zero_pads = torch.Tensor(
        out.size(0), planes - out.size(1), out.size(2), out.size(3),
        out.size(4)).zero_()
    if not no_cuda:
        if isinstance(out.data, torch.cuda.FloatTensor):
            zero_pads = zero_pads.cuda()

    out = Variable(torch.cat([out.data, zero_pads], dim=1))

    return out


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3x3(inplanes, planes, stride=stride, dilation=dilation)
        self.bn1 = nn.BatchNorm3d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3x3(planes, planes, dilation=dilation)
        self.bn2 = nn.BatchNorm3d(planes)
        self.downsample = downsample
        self.stride = stride
        self.dilation = dilation

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 2  # 4 -> 2

    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm3d(planes)
        self.conv2 = nn.Conv3d(
            planes, planes, kernel_size=3, stride=stride, dilation=dilation, padding=dilation, bias=False)
        self.bn2 = nn.BatchNorm3d(planes)
        self.conv3 = nn.Conv3d(planes, planes * 2, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm3d(planes * 2)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        self.dilation = dilation

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet_3d(nn.Module):

    def __init__(self,
                 block,
                 layers,
                 num_classes=1000,
                 shortcut_type='B',
                 no_cuda = False,
                 include_top=True):
        super(ResNet_3d, self).__init__()
        self.inplanes = 64
        self.no_cuda = no_cuda
        self.include_top = include_top
        
        self.conv1 = nn.Conv3d(
            1,
            64,
            kernel_size=7,
            stride=(1, 1, 1), #stride=(2, 2, 2)-> (1, 1, 1)
            padding=(3, 3, 3),
            bias=False)
            
        self.bn1 = nn.BatchNorm3d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type)
        self.layer2 = self._make_layer(
            block, 128, layers[1], shortcut_type, stride=2)
        self.layer3 = self._make_layer(
            block, 256, layers[2], shortcut_type, stride=2)
        self.layer4 = self._make_layer(
            block, 512, layers[3], shortcut_type, stride=2)
        
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))  # output size = (1, 1)自适应
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
                


    def _make_layer(self, block, planes, blocks, shortcut_type, stride=1, dilation=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            if shortcut_type == 'A':
                downsample = partial(
                    downsample_basic_block,
                    planes=planes * block.expansion,
                    stride=stride,
                    no_cuda=self.no_cuda)
            else:
                downsample = nn.Sequential(
                    nn.Conv3d(
                        self.inplanes,
                        planes * block.expansion,
                        kernel_size=1,
                        stride=stride,
                        bias=False), nn.BatchNorm3d(planes * block.expansion))

        layers = []
        layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        print('x1=',x.shape)
        x = self.maxpool(x)
        x = self.layer1(x)
        print('x2=',x.shape)
        x = self.layer2(x)
        print('x3=',x.shape)
        x = self.layer3(x)
        print('x4=',x.shape)
        x = self.layer4(x)
        print('x5=',x.shape)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


例子

resnet50_3d = ResNet_3d(Bottleneck, [3, 4, 6, 3],shortcut_type='B',no_cuda=False,num_classes=3,include_top=True)
x=torch.randn(1,1,64,64,64)
X=resnet50_3d(x)
print(X.shape)

x1= torch.Size([1, 64, 64, 64, 64])
x2= torch.Size([1, 128, 32, 32, 32])
x3= torch.Size([1, 256, 16, 16, 16])
x4= torch.Size([1, 512, 8, 8, 8])
x5= torch.Size([1, 1024, 4, 4, 4])
torch.Size([1, 3])

3. UNet_3d_resnet_encoder

完整代码如下,可以直接使用

from typing import Dict
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable
import math
from functools import partial


def conv3x3x3(in_planes, out_planes, stride=1, dilation=1):
    # 3x3x3 convolution with padding
    return nn.Conv3d(
        in_planes,
        out_planes,
        kernel_size=3,
        dilation=dilation,
        stride=stride,
        padding=dilation,
        bias=False)


def downsample_basic_block(x, planes, stride, no_cuda=False):
    out = F.avg_pool3d(x, kernel_size=1, stride=stride)
    zero_pads = torch.Tensor(
        out.size(0), planes - out.size(1), out.size(2), out.size(3),
        out.size(4)).zero_()
    if not no_cuda:
        if isinstance(out.data, torch.cuda.FloatTensor):
            zero_pads = zero_pads.cuda()

    out = Variable(torch.cat([out.data, zero_pads], dim=1))

    return out


class Bottleneck(nn.Module):
    expansion = 2  # 4 -> 2

    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm3d(planes)
        self.conv2 = nn.Conv3d(
            planes, planes, kernel_size=3, stride=stride, dilation=dilation, padding=dilation, bias=False)
        self.bn2 = nn.BatchNorm3d(planes)
        self.conv3 = nn.Conv3d(planes, planes * 2, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm3d(planes * 2)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        self.dilation = dilation

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class DoubleConv_3d(nn.Sequential):
    def __init__(self, in_channels, out_channels, mid_channels=None):
        if mid_channels is None:
            mid_channels = out_channels
        super(DoubleConv_3d, self).__init__(
            nn.Conv3d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm3d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv3d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm3d(out_channels),
            nn.ReLU(inplace=True)
        )


class Down_3d(nn.Sequential):
    def __init__(self, in_channels, out_channels):
        super(Down_3d, self).__init__(
            nn.MaxPool3d(2, stride=2),
            DoubleConv_3d(in_channels, out_channels)
        )


class Up_3d(nn.Module):
    def __init__(self, in_channels, out_channels, bilinear=True):
        super(Up_3d, self).__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
            self.conv = DoubleConv_3d(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose3d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv_3d(in_channels, out_channels)

    def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
        x1 = self.up(x1)
        # [N, C, H, W, Z]
        diff_y = x2.size()[2] - x1.size()[2]
        diff_x = x2.size()[3] - x1.size()[3]
        diff_z = x2.size()[4] - x1.size()[4]

        # padding_left, padding_right, padding_top, padding_bottom
        x1 = F.pad(x1, [diff_x // 2, diff_x - diff_x // 2,
                        diff_y // 2, diff_y - diff_y // 2,
                        diff_z // 2, diff_z - diff_z // 2])

        x = torch.cat([x2, x1], dim=1)
        x = self.conv(x)
        return x


class OutConv_3d(nn.Sequential):
    def __init__(self, in_channels, num_classes):
        super(OutConv_3d, self).__init__(
            nn.Conv3d(in_channels, num_classes, kernel_size=1)
        )
    
    

class UNet_3d_resnet_encoder(nn.Module):
    def __init__(self,
                 block,
                 layers,
                 in_channels: int = 1,
                 num_classes: int = 2,
                 bilinear: bool = True,
                 base_c: int = 64,
                 shortcut_type='B',
                 no_cuda = False):
        super(UNet_3d_resnet_encoder, self).__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.bilinear = bilinear
        
        self.inplanes = 64
        self.no_cuda = no_cuda

        #self.in_conv = DoubleConv_3d(in_channels, base_c)
        #self.down1 = Down_3d(base_c, base_c * 2)
        #self.down2 = Down_3d(base_c * 2, base_c * 4)
        #self.down3 = Down_3d(base_c * 4, base_c * 8)
        # unet
        factor = 2 if bilinear else 1
        self.down4 = Down_3d(base_c * 8, base_c * 16 // factor)
        self.up1 = Up_3d(base_c * 16, base_c * 8 // factor, bilinear)
        self.up2 = Up_3d(base_c * 8, base_c * 4 // factor, bilinear)
        self.up3 = Up_3d(base_c * 4, base_c * 2 // factor, bilinear)
        self.up4 = Up_3d(base_c * 2, base_c, bilinear)
        self.out_conv = OutConv_3d(base_c, num_classes)
        
        # resnet
        
        self.conv1 = nn.Conv3d(
            in_channels,
            64,
            kernel_size=7,
            stride=(1, 1, 1), #stride=(2, 2, 2)-> (1, 1, 1)
            padding=(3, 3, 3),
            bias=False)
            
        self.bn1 = nn.BatchNorm3d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type)
        self.layer2 = self._make_layer(
            block, 128, layers[1], shortcut_type, stride=2)
        self.layer3 = self._make_layer(
            block, 256, layers[2], shortcut_type, stride=2)
        self.layer4 = self._make_layer(
            block, 512, layers[3], shortcut_type, stride=2)


        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm3d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
                


    def _make_layer(self, block, planes, blocks, shortcut_type, stride=1, dilation=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            if shortcut_type == 'A':
                downsample = partial(
                    downsample_basic_block,
                    planes=planes * block.expansion,
                    stride=stride,
                    no_cuda=self.no_cuda)
            else:
                downsample = nn.Sequential(
                    nn.Conv3d(
                        self.inplanes,
                        planes * block.expansion,
                        kernel_size=1,
                        stride=stride,
                        bias=False), nn.BatchNorm3d(planes * block.expansion))

        layers = []
        layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
        x = self.conv1(x)
        x = self.bn1(x)
        x1 = self.relu(x)
        #print('x1=',x1.shape)
        x2 = self.maxpool(x1)
        x2 = self.layer1(x2)
        #print('x2=',x2.shape)
        x3 = self.layer2(x2)
        #print('x3=',x3.shape)
        x4 = self.layer3(x3)
        #print('x4=',x4.shape)
        x5 = self.layer4(x4)
        #print('x5=',x5.shape)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.out_conv(x)

        return logits

例子1,使用Resnet50替换Encoder, 输入大小为 (1,64,64,64)

model = UNet_3d_resnet_encoder(block = Bottleneck, # 目前仅支持Resnet50以上的版本,不支持Resnet18/34
             layers = [3, 4, 6, 3],
             shortcut_type='B',
             no_cuda=False,
             in_channels= 1,
             num_classes= 2,
             bilinear= False, # 目前不支持=True
             base_c= 64) 
x=torch.randn(1,1,64,64,64)
X=model(x)
print('model output shape =')
print(X.shape)

model output shape =
torch.Size([1, 2, 64, 64, 64])

输入大小为 (3,128,128,64)

model = UNet_3d_resnet_encoder(block = Bottleneck, # 目前仅支持Resnet50以上的版本,不支持Resnet18/34
             layers = [3, 4, 6, 3],
             shortcut_type='B',
             no_cuda=False,
             in_channels= 3,
             num_classes= 2,
             bilinear= False, # 目前不支持=True
             base_c= 64) 
x=torch.randn(1,3,128,128,64)
X=model(x)
print('model output shape =')
print(X.shape)
model output shape =
torch.Size([1, 2, 128, 128, 64])

例子2,使用Resnet101替换Encoder

model = UNet_3d_resnet_encoder(block = Bottleneck, # 目前仅支持Resnet50以上的版本,不支持Resnet18/34
             layers = [3, 4, 23, 3],
             shortcut_type='B',
             no_cuda=False,
             in_channels= 1,
             num_classes= 2,
             bilinear= False, # 目前不支持=True
             base_c= 64) 
x=torch.randn(1,1,64,64,64)
X=model(x)
print('model output shape =')
print(X.shape)

model output shape =
torch.Size([1, 2, 64, 64, 64])

例子3,使用Resnet152替换Encoder

model = UNet_3d_resnet_encoder(block = Bottleneck, # 目前仅支持Resnet50以上的版本,不支持Resnet18/34
             layers = [3, 8, 36, 3],
             shortcut_type='B',
             no_cuda=False,
             in_channels= 1,
             num_classes= 2,
             bilinear= False, # 目前不支持=True
             base_c= 64) 
x=torch.randn(1,1,64,64,64)
X=model(x)
print('model output shape =')
print(X.shape)
model output shape =
torch.Size([1, 2, 64, 64, 64])
<think>嗯,用户的问题是关于如何在U-Net++模型中将骨干网络替换为EfficientNet,并且询问是否需要同时替换卷积块。首先,我需要回忆一下U-Net++的结构。U-Net++是基于U-Net的改进,通过密集连接跳跃路径来增强特征融合。它的骨干网络通常是VGG或者ResNet之类的CNN。 接下来,EfficientNet作为骨干网络的优势在于它的复合缩放方法,平衡了深度、宽度和分辨率,所以替换后可能提升模型效率和效果。但用户提到是否需要替换卷积块,这可能涉及到EfficientNet特有的MBConv结构。 首先,替换骨干网络时,通常需要将原有的主干部分替换成EfficientNet的结构。例如,如果原U-Net++使用VGG的卷积层,那么需要将这部分替换为EfficientNet的层。但U-Net++本身可能有特定的跳跃连接和密集块结构,所以需要注意特征图的尺寸匹配问题,可能需要调整步长或添加上采样/下采样层。 然后,是否需要替换卷积块为EfficientNet的组件,比如MBConv?这取决于设计选择。如果只是替换骨干网络的主干部分,而保留U-Net++中的其他卷积块,可能可以,但为了保持EfficientNet的特性,可能更好的是在整个网络中使用MBConv块,尤其是在跳跃连接和上采样路径中。不过这样可能会增加计算量,需要权衡效果和效率。 另外,需要注意预训练权重的问题。使用EfficientNet的预训练模型作为初始化,可以提升性能,但需要确保输入尺寸和通道数匹配。例如,EfficientNet-B0的输入是224x224,如果用户的数据尺寸不同,可能需要进行调整。 在实现步骤上,可能需要分步进行:首先替换骨干网络,调整各阶段的通道数,然后处理跳跃连接中的特征融合,确保尺寸一致。可能需要在跳跃路径中添加1x1卷积来调整通道数,或者使用插值来调整分辨率。此外,如果EfficientNet的某阶段输出步长较大,可能需要修改上采样部分的层数以恢复分辨率。 还有可能遇到的问题,比如训练时收敛困难,这时候可能需要冻结部分骨干网络层,或者调整学习率。另外,内存消耗也是一个考虑因素,尤其是当使用较大的EfficientNet变体时。 总结来说,替换骨干网络到EfficientNet需要结构上的适配,可能替换卷积块到MBConv有助于保持模型的一致性,但需要根据具体任务和资源情况进行调整。同时,利用预训练权重和适当的特征融合策略是关键。</think>### U-Net++替换EfficientNet骨干网络实现方法 #### 1. **结构适配** - **骨干网络替换**:将U-Net++的编码器部分替换为EfficientNet的卷积块。例如,EfficientNet-B0的7个阶段(Stem + MBConv blocks)需要对应U-Net++的编码器层级结构[^2]。 - **特征图对齐**:EfficientNet每个阶段的输出通道数可能与原U-Net++不同,需通过$1\times1$卷积调整通道数。例如,若原编码器某层输出为$C=256$,而EfficientNet对应层输出为$C'=320$,则需添加卷积层$Conv2D(320 \to 256)$。 - **分辨率匹配**:EfficientNet的降采样次数可能多于原网络,需在跳跃连接中插入转置卷积或插值层。例如,若某层特征图尺寸为$\frac{H}{16} \times \frac{W}{16}$,而解码器要求$\frac{H}{8} \times \frac{W}{8}$,则需使用双线性插值上采样2倍。 #### 2. **组件替换策略** - **必需替换**:编码器的所有基础卷积块必须替换为EfficientNet的MBConv模块,因其包含倒置残差结构和可学习的激活函数(Swish),这对模型效率至关重要[^1]。 - **可选替换**:解码器中的常规卷积可保留或替换为轻量级卷积(如深度可分离卷积)。例如,原U-Net++的$3\times3$卷积块可改为: ```python nn.Sequential( nn.Conv2d(in_c, out_c, 3, padding=1), nn.BatchNorm2d(out_c), nn.ReLU() ) ``` 替换为: ```python MBConvBlock(in_c, out_c, expand_ratio=6, kernel_size=3, stride=1) ``` #### 3. **实现示例(PyTorch)** ```python import torch from efficientnet_pytorch import EfficientNet from torch import nn class EfficientUNetPlusPlus(nn.Module): def __init__(self): super().__init__() # 加载EfficientNet预训练模型 self.encoder = EfficientNet.from_pretrained('efficientnet-b0').features # 获取各阶段输出通道数 self.enc_channels = [16, 24, 40, 80, 112, 192, 320] # 解码器定义(示例) self.decoder_blocks = nn.ModuleList([ DecoderBlock(320+192, 256), # 跳跃连接输入通道数需适配 DecoderBlock(256+112, 128), # 其他解码层... ]) def forward(self, x): enc_outputs = [] # 提取多尺度特征 for idx, layer in enumerate(self.encoder): x = layer(x) if idx in [3,5,7,9,11,13,15]: # 对应EfficientNet的关键输出层 enc_outputs.append(x) # 解码过程(需添加通道调整层) for i, dec_block in enumerate(self.decoder_blocks): x = dec_block(x, self.channel_adjust(enc_outputs[-i-2])) return x def channel_adjust(self, feat): return nn.Conv2d(feat.shape[1], target_channels, 1)(feat) ``` #### 4. **关键参数配置** | 组件 | 原U-Net++参数 | 替换后参数 | |--------------|---------------------------|---------------------------| | 卷积类型 | 标准3x3卷积 | MBConv(扩展比6) | | 下采样方式 | MaxPooling | MBConv stride=2 | | 激活函数 | ReLU | Swish | | 参数量对比 | VGG16基: 138M | EfficientNet-B0基: 5.3M | #### 5. **训练建议** - **预训练权重加载**:冻结EfficientNet的前3个阶段参数(适用于小数据集) - **学习率策略**:解码器学习率设为编码器的10倍(例如编码器lr=1e-5,解码器lr=1e-4) - **归一化处理**:对EfficientNet的输出特征使用GroupNorm代替BatchNorm(尤其在batch size较小时)
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值