Traceback (most recent call last):
File "/home/gcl/project/CSCA-main/train.py", line 80, in <module>
trainer.train()
File "/home/gcl/project/CSCA-main/utils/regression_trainer.py", line 139, in train
self.train_eopch()
File "/home/gcl/project/CSCA-main/utils/regression_trainer.py", line 166, in train_eopch
outputs = self.model(inputs, self.dataset)
File "/home/gcl/anaconda3/envs/vmamba/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/gcl/project/CSCA-main/models/builder.py", line 155, in forward
out = self.encode_decode(inputs)
File "/home/gcl/project/CSCA-main/models/builder.py", line 135, in encode_decode
x = self.backbone(rgb, modal_x)
File "/home/gcl/anaconda3/envs/vmamba/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/gcl/project/CSCA-main/models/encoders/dual_vmamba.py", line 110, in forward
out = self.forward_features(x_rgb, x_e)
File "/home/gcl/project/CSCA-main/models/encoders/dual_vmamba.py", line 100, in forward_features
cross_rgb, cross_x = self.cross_mamba[i](out_rgb.permute(0, 2, 3, 1).contiguous(), out_x.permute(0, 2, 3, 1).contiguous()) # B x H x W x C
File "/home/gcl/anaconda3/envs/vmamba/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/gcl/project/CSCA-main/models/encoders/vmamba.py", line 1870, in forward
return self._forward(x_rgb, x_e)
File "/home/gcl/project/CSCA-main/models/encoders/vmamba.py", line 1858, in _forward
x_rgb_cross, x_e_cross = self.op(x_rgb, x_e)
File "/home/gcl/anaconda3/envs/vmamba/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/gcl/project/CSCA-main/models/encoders/vmamba.py", line 1633, in forward
y_rgb, y_e = self.CMA_ssm(x_rgb_conv, x_e_conv)
File "/home/gcl/anaconda3/envs/vmamba/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/gcl/project/CSCA-main/models/encoders/vmamba.py", line 1509, in forward
selective_scan = selective_scan_fn_v1
NameError: name 'selective_scan_fn_v1' is not defined. Did you mean: 'selective_scan'?
class EncoderDecoder(nn.Module): def __init__(self, args, criterion=nn.CrossEntropyLoss(reduction='mean', ignore_index=255), norm_layer=nn.BatchNorm2d): super(EncoderDecoder, self).__init__() self.channels = [64, 128, 320, 512] self.norm_layer = norm_layer # import backbone and decoder if args.backbone == 'swin_s': logger.info('Using backbone: Swin-Transformer-small') from .encoders.dual_swin import swin_s as backbone self.channels = [96, 192, 384, 768] self.backbone = backbone(norm_fuse=norm_layer) # elif cfg.backbone == 'swin_b': # logger.info('Using backbone: Swin-Transformer-Base') # from .encoders.dual_swin import swin_b as backbone # self.channels = [128, 256, 512, 1024] # self.backbone = backbone(norm_fuse=norm_layer) # elif cfg.backbone == 'mit_b5': # logger.info('Using backbone: Segformer-B5') # from .encoders.dual_segformer import mit_b5 as backbone # self.backbone = backbone(norm_fuse=norm_layer) # elif cfg.backbone == 'mit_b4': # logger.info('Using backbone: Segformer-B4') # from .encoders.dual_segformer import mit_b4 as backbone # self.backbone = backbone(norm_fuse=norm_layer) # elif cfg.backbone == 'mit_b2': # logger.info('Using backbone: Segformer-B2') # from .encoders.dual_segformer import mit_b2 as backbone # self.backbone = backbone(norm_fuse=norm_layer) # elif cfg.backbone == 'mit_b1': # logger.info('Using backbone: Segformer-B1') # from .encoders.dual_segformer import mit_b0 as backbone # self.backbone = backbone(norm_fuse=norm_layer) # elif cfg.backbone == 'mit_b0': # logger.info('Using backbone: Segformer-B0') # self.channels = [32, 64, 160, 256] # from .encoders.dual_segformer import mit_b0 as backbone # self.backbone = backbone(norm_fuse=norm_layer) # elif cfg.backbone == 'mamba': # logger.info('Using backbone: MAMBA') # from .encoders.dual_vmamba_sub_attn import mit_b2 as backbone # self.backbone = backbone(norm_fuse=norm_layer) elif args.backbone == 'sigma_tiny': logger.info('Using backbone: V-MAMBA') self.channels = [96, 192, 384, 768] from .encoders.dual_vmamba import vssm_tiny as backbone self.backbone = backbone() elif args.backbone == 'sigma_small': logger.info('Using backbone: V-MAMBA') self.channels = [96, 192, 384, 768] from .encoders.dual_vmamba import vssm_small as backbone self.backbone = backbone() elif args.backbone == 'sigma_base': logger.info('Using backbone: V-MAMBA') self.channels = [128, 256, 512, 1024] from .encoders.dual_vmamba import vssm_base as backbone self.backbone = backbone() else: logger.info('Using backbone: Segformer-B2') from .encoders.dual_segformer import mit_b2 as backbone self.backbone = backbone(norm_fuse=norm_layer) self.aux_head = None if args.decoder == 'MLPDecoder': logger.info('Using MLP Decoder') from .decoders.MLPDecoder import DecoderHead self.decode_head = DecoderHead(in_channels=self.channels, num_classes=args.num_classes, norm_layer=norm_layer, embed_dim=args.decoder_embed_dim) # elif cfg.decoder == 'UPernet': # logger.info('Using Upernet Decoder') # from .decoders.UPernet import UPerHead # self.decode_head = UPerHead(in_channels=self.channels ,num_classes=cfg.num_classes, norm_layer=norm_layer, channels=512) # from .decoders.fcnhead import FCNHead # self.aux_index = 2 # self.aux_rate = 0.4 # self.aux_head = FCNHead(self.channels[2], cfg.num_classes, norm_layer=norm_layer) # # elif cfg.decoder == 'deeplabv3+': # logger.info('Using Decoder: DeepLabV3+') # from .decoders.deeplabv3plus import DeepLabV3Plus as Head # self.decode_head = Head(in_channels=self.channels, num_classes=cfg.num_classes, norm_layer=norm_layer) # from .decoders.fcnhead import FCNHead # self.aux_index = 2 # self.aux_rate = 0.4 # self.aux_head = FCNHead(self.channels[2], cfg.num_classes, norm_layer=norm_layer) # elif args.decoder == 'MambaDecoder': logger.info('Using Mamba Decoder') from .decoders.MambaDecoder import MambaDecoder self.deep_supervision = False self.decode_head = MambaDecoder(img_size=[args.image_height, args.image_width], in_channels=self.channels, num_classes=args.num_classes, embed_dim=self.channels[0], deep_supervision=self.deep_supervision) else: logger.info('No decoder(FCN-32s)') from .decoders.fcnhead import FCNHead self.decode_head = FCNHead(in_channels=self.channels[-1], kernel_size=3, num_classes=args.num_classes, norm_layer=norm_layer) self.criterion = criterion if self.criterion: self.init_weights(args, pretrained=args.pretrained_model) def init_weights(self, cfg, pretrained=None): if pretrained: if cfg.backbone != 'vmamba': logger.info('Loading pretrained model: {}'.format(pretrained)) self.backbone.init_weights(pretrained=pretrained) logger.info('Initing weights ...') init_weight(self.decode_head, nn.init.kaiming_normal_, self.norm_layer, cfg.bn_eps, cfg.bn_momentum, mode='fan_in', nonlinearity='relu') if self.aux_head: init_weight(self.aux_head, nn.init.kaiming_normal_, self.norm_layer, cfg.bn_eps, cfg.bn_momentum, mode='fan_in', nonlinearity='relu') def encode_decode(self, inputs): """Encode images with backbone and decode into a semantic segmentation map of the same size as input.""" rgb = inputs[0] modal_x = inputs[1] if not self.deep_supervision: orisize = rgb.shape x = self.backbone(rgb, modal_x) out = self.decode_head.forward(x) out = F.interpolate(out, size=orisize[2:], mode='bilinear', align_corners=False) if self.aux_head: aux_fm = self.aux_head(x[self.aux_index]) aux_fm = F.interpolate(aux_fm, size=orisize[2:], mode='bilinear', align_corners=False) return out, aux_fm return out else: x = self.backbone(rgb, modal_x) x_last, x_output_0, x_output_1, x_output_2 = self.decode_head.forward(x) return x_last, x_output_0, x_output_1, x_output_2 def forward(self, inputs, label=None): # rgb = inputs[0] # modal_x = inputs[1] if not self.deep_supervision: if self.aux_head: out, aux_fm = self.encode_decode(inputs) else: out = self.encode_decode(inputs) # if label is not None: # loss = self.criterion(out, label.long()) # if self.aux_head: # loss += self.aux_rate * self.criterion(aux_fm, label.long()) # return loss return out
class FCNHead(nn.Module): def __init__(self, in_channels=384, channels=None, kernel_size=3, dilation=1, num_classes=40, norm_layer=nn.BatchNorm2d): super(FCNHead, self).__init__() self.kernel_size = kernel_size self.in_channels = in_channels self.channels = channels or in_channels // 4 conv_padding = (kernel_size // 2) * dilation self.conv = nn.Sequential( nn.Conv2d(self.in_channels, self.channels, kernel_size, padding=conv_padding), norm_layer(self.channels), nn.ReLU(inplace=True) ) self.classifier = nn.Conv2d(self.channels, 1, kernel_size=1) def forward(self, x): output = self.conv(x) output = self.classifier(output) return output
import torch import torch.nn as nn import torch.nn.functional as F class MambaDecoder(nn.Module): def __init__(self, img_size=[480, 640], in_channels=[96, 192, 384, 768], num_classes=1, dropout_ratio=0.1, embed_dim=96, align_corners=False, patch_size=4, depths=[4, 4, 4, 4], mlp_ratio=4., drop_rate=0.0, attn_drop_rate=0., drop_path_rate=0.1, norm_layer=nn.LayerNorm, use_checkpoint=False, deep_supervision=False, **kwargs): super().__init__() self.num_classes = num_classes self.num_layers = len(depths) self.mlp_ratio = mlp_ratio self.patch_size = patch_size self.patches_resolution = [img_size[0] // patch_size, img_size[1] // patch_size] self.deep_supervision = deep_supervision dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] self.layers_up = nn.ModuleList() for i_layer in range(self.num_layers): if i_layer == 0: layer_up = PatchExpand( input_resolution=(self.patches_resolution[0] // (2 ** (self.num_layers - 1 - i_layer)), self.patches_resolution[1] // (2 ** (self.num_layers - 1 - i_layer))), dim=int(embed_dim * 2 ** (self.num_layers - 1 - i_layer)), dim_scale=2, norm_layer=norm_layer) else: layer_up = Mamba_up(dim=int(embed_dim * 2 ** (self.num_layers - 1 - i_layer)), input_resolution=(self.patches_resolution[0] // (2 ** (self.num_layers - 1 - i_layer)), self.patches_resolution[1] // (2 ** (self.num_layers - 1 - i_layer))), depth=depths[(self.num_layers - 1 - i_layer)], mlp_ratio=self.mlp_ratio, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[sum(depths[:(self.num_layers - 1 - i_layer)]):sum(depths[:(self.num_layers - 1 - i_layer) + 1])], norm_layer=norm_layer, upsample=PatchExpand if (i_layer < self.num_layers - 1) else None, use_checkpoint=use_checkpoint) self.layers_up.append(layer_up) self.norm_up = norm_layer(embed_dim) if self.deep_supervision: self.norm_ds = nn.ModuleList([norm_layer(embed_dim * 2 ** (self.num_layers - 2 - i_layer)) for i_layer in range(self.num_layers-1)]) self.output_ds = nn.ModuleList([nn.Conv2d(in_channels=embed_dim * 2 ** (self.num_layers - 2 - i_layer), out_channels=self.num_classes, kernel_size=1, bias=False) for i_layer in range(self.num_layers-1)]) # 调整特征图大小的卷积层 self.reduce_conv = nn.Conv2d(in_channels=embed_dim, out_channels=embed_dim, kernel_size=3, stride=2, padding=1, bias=False) self.output = nn.Conv2d(in_channels=embed_dim, out_channels=self.num_classes, kernel_size=1, bias=False) def forward_up_features(self, inputs): if not self.deep_supervision: for inx, layer_up in enumerate(self.layers_up): if inx == 0: x = inputs[3 - inx] # B, 768, 8, 8 x = x.permute(0, 2, 3, 1).contiguous() # B, 8, 8, 768 y = layer_up(x) # B, 16, 16, 384 else: B, C, H, W = inputs[3 - inx].shape y = F.interpolate(y.permute(0, 3, 1, 2).contiguous(), size=(H, W), mode='bilinear', align_corners=False).permute(0, 2, 3, 1).contiguous() x = y + inputs[3 - inx].permute(0, 2, 3, 1).contiguous() y = layer_up(x) x = self.norm_up(y) return x else: x_upsample = [] for inx, layer_up in enumerate(self.layers_up): if inx == 0: x = inputs[3 - inx] # B, 768, 8, 8 x = x.permute(0, 2, 3, 1).contiguous() # B, 8, 8, 768 y = layer_up(x) # B, 16, 16, 384 x_upsample.append(self.norm_ds[inx](y)) else: x = y + inputs[3 - inx].permute(0, 2, 3, 1).contiguous() y = layer_up(x) if inx != self.num_layers - 1: x_upsample.append(self.norm_ds[inx](y)) x = self.norm_up(y) return x, x_upsample def forward(self, inputs): if not self.deep_supervision: x = self.forward_up_features(inputs) # B, H, W, C x = x.permute(0, 3, 1, 2).contiguous() # B, C, H, W x = self.reduce_conv(x) # 使用卷积层调整特征图大小 x = self.output(x) # 调整通道数为1 return x else: x, x_upsample = self.forward_up_features(inputs) x_last = x.permute(0, 3, 1, 2).contiguous() # B, C, H, W x_last = self.reduce_conv(x_last) # 使用卷积层调整特征图大小 x_last = self.output(x_last) # 调整通道数为1 x_output_0 = self.output_ds[0](F.interpolate(x_upsample[0].permute(0,3,1,2).contiguous(), scale_factor=16, mode='bilinear', align_corners=False)) x_output_1 = self.output_ds[1](F.interpolate(x_upsample[1].permute(0,3,1,2).contiguous(), scale_factor=8, mode='bilinear', align_corners=False)) x_output_2 = self.output_ds[2](F.interpolate(x_upsample[2].permute(0,3,1,2).contiguous(), scale_factor=4, mode='bilinear', align_corners=False)) return x_last, x_output_0, x_output_1, x_output_2
Traceback (most recent call last):
File "/home/gcl/project/CSCA-main/train.py", line 79, in <module>
trainer.setup()
File "/home/gcl/project/CSCA-main/utils/regression_trainer.py", line 98, in setup
self.model = segmodel(args, criterion=criterion, norm_layer=BatchNorm2d)
File "/home/gcl/project/CSCA-main/models/builder.py", line 62, in __init__
from .encoders.dual_vmamba import vssm_small as backbone
File "/home/gcl/project/CSCA-main/models/encoders/dual_vmamba.py", line 12, in <module>
from models.encoders.vmamba import Backbone_VSSM, CrossMambaFusionBlock, ConcatMambaFusionBlock
File "/home/gcl/project/CSCA-main/models/encoders/vmamba.py", line 32, in <module>
import selective_scan_cuda_core as selective_scan_cuda
ImportError: /home/gcl/anaconda3/envs/vmunet/lib/python3.10/site-packages/selective_scan_cuda_core.cpython-310-x86_64-linux-gnu.so: undefined symbol: _ZN3c104cuda9SetDeviceEi