TransUNet

最新推荐文章于 2024-06-08 17:27:51 发布

shchojj

最新推荐文章于 2024-06-08 17:27:51 发布

阅读量587

点赞数 1

分类专栏： segmentation 文章标签：深度学习人工智能

原文链接：https://arxiv.org/pdf/2102.04306.pdf

版权

segmentation 专栏收录该内容

32 篇文章 15 订阅

订阅专栏

一、下载基于imagenet21k的vit预训练参数。

二、将数据转换成npz格式，裁剪数值范围在[-125,275]之间的数据，并归一化到[0,1]，将3D volume转换成2D slices。但是测试的之后在h5格式的数据中依旧使用3D volume进行测试。

三、训练预测

np.random.randint(0, 4)//随机整数
np.rot90(image, k)//随机90°旋转
np.flip(image, axis=axis).copy()//随机轴反转
ndimage.rotate(image, angle, order=0, reshape=False)//随机角度旋转
zoom(image, (self.output_size[0] / x, self.output_size[1] / y), order=3)//放缩图像，order为插值方法

cudnn.benchmark = True //自动寻找高效算法，来达到优化运行效率
cudnn.deterministic = False //固定随机数种子

# 随机数种子
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)

weights.transpose([3, 2, 0, 1])//convert HWIO to OIHW.
torch.from_numpy(weights)// numpy to torch tensor

//attention机制

self.query = Linear(config.hidden_size, self.all_head_size)
self.key = Linear(config.hidden_size, self.all_head_size)
self.value = Linear(config.hidden_size, self.all_head_size)

  def transpose_for_scores(self, x):
        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
        x = x.view(*new_x_shape)//重构张量的维度
        return x.permute(0, 2, 1, 3)//维度换位。

query_layer = self.transpose_for_scores(mixed_query_layer)
key_layer = self.transpose_for_scores(mixed_key_layer)
value_layer = self.transpose_for_scores(mixed_value_layer)

attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
attention_scores = attention_scores / math.sqrt(self.attention_head_size)
attention_probs = self.softmax(attention_scores)
attention_probs = self.attn_dropout(attention_probs)

context_layer = torch.matmul(attention_probs, value_layer)
context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
context_layer = context_layer.view(*new_context_layer_shape)

self.out = Linear(config.hidden_size, config.hidden_size)
self.attn_dropout = Dropout(config.transformer["attention_dropout_rate"])
self.proj_dropout = Dropout(config.transformer["attention_dropout_rate"])
attention_output = self.out(context_layer)
attention_output = self.proj_dropout(attention_output)

//MLP

初始化

nn.init.xavier_uniform_(self.fc1.weight)//通过均匀分布使得输入输出的方差相同
nn.init.normal_(self.fc1.bias, std=1e-6)//正态分布

self.fc1 = Linear(config.hidden_size, config.transformer["mlp_dim"])
self.fc2 = Linear(config.transformer["mlp_dim"], config.hidden_size)
self.act_fn = ACT2FN["gelu"]
self.dropout = Dropout(config.transformer["dropout_rate"])

x = self.fc1(x)
x = self.act_fn(x)//GELU激活函数
x = self.dropout(x)
x = self.fc2(x)
x = self.dropout(x)

//PreActBottleneck
y = self.relu(self.gn1(self.conv1(x)))
y = self.relu(self.gn2(self.conv2(y)))
y = self.gn3(self.conv3(y))

y = self.relu(residual + y)

 self.body = nn.Sequential(OrderedDict([
            ('block1', nn.Sequential(OrderedDict(
                [('unit1', PreActBottleneck(cin=width, cout=width*4, cmid=width))] +
                [(f'unit{i:d}', PreActBottleneck(cin=width*4, cout=width*4, cmid=width)) for i in range(2, block_units[0] + 1)],
                ))),
            ('block2', nn.Sequential(OrderedDict(
                [('unit1', PreActBottleneck(cin=width*4, cout=width*8, cmid=width*2, stride=2))] +
                [(f'unit{i:d}', PreActBottleneck(cin=width*8, cout=width*8, cmid=width*2)) for i in range(2, block_units[1] + 1)],
                ))),
            ('block3', nn.Sequential(OrderedDict(
                [('unit1', PreActBottleneck(cin=width*8, cout=width*16, cmid=width*4, stride=2))] +
                [(f'unit{i:d}', PreActBottleneck(cin=width*16, cout=width*16, cmid=width*4)) for i in range(2, block_units[2] + 1)],
                ))),
        ]))

//embedding

计算patch大小、个数

patch_size = (img_size[0] // 16 // grid_size[0], img_size[1] // 16 // grid_size[1])
patch_size_real = (patch_size[0] * 16, patch_size[1] * 16)
n_patches = (img_size[0] // patch_size_real[0]) * (img_size[1] // patch_size_real[1]) 
_pair//就是将一个数，变成两个数

//patch embedding

patch_embeddings = Conv2d(in_channels=in_channels,
                                       out_channels=config.hidden_size,
                                       kernel_size=patch_size,
                                       stride=patch_size)

//position embedding

 self.position_embeddings = nn.Parameter(torch.zeros(1, n_patches, config.hidden_size))

x = self.patch_embeddings(x)  # (B, hidden. n_patches^(1/2), n_patches^(1/2))
x = x.flatten(2)
x = x.transpose(-1, -2)  # (B, n_patches, hidden)
embeddings = x + self.position_embeddings
embeddings = self.dropout(embeddings)

//block

self.hidden_size = config.hidden_size
self.attention_norm = LayerNorm(config.hidden_size, eps=1e-6)
self.ffn_norm = LayerNorm(config.hidden_size, eps=1e-6)
self.ffn = Mlp(config)
self.attn = Attention(config, vis)

h = x
x = self.attention_norm(x)
x, weights = self.attn(x)
x = x + h

h = x
x = self.ffn_norm(x)
x = self.ffn(x)
x = x + h

//Encoder

for _ in range(config.transformer["num_layers"]):
    layer = Block(config, vis)
     self.layer.append(copy.deepcopy(layer))

//Transformer

 embedding_output, features = self.embeddings(input_ids)
encoded, attn_weights = self.encoder(embedding_output)  # (B, n_patch, hidden)

//DecoderBlock

 x = self.up(x)//UpsamplingBilinear2d
if skip is not None:
   x = torch.cat([x, skip], dim=1)
x = self.conv1(x)//conv, bn, relu
x = self.conv2(x)

//SegmentationHead

conv2d, upsampling

//DecoderCup

blocks = [
            DecoderBlock(in_ch, out_ch, sk_ch) for in_ch, out_ch, sk_ch in zip(in_channels, out_channels, skip_channels)
        ]
 for i, decoder_block in enumerate(self.blocks):
            if features is not None:
                skip = features[i] if (i < self.config.n_skip) else None
            else:
                skip = None
            x = decoder_block(x, skip=skip)

//VisionTransformer

 if x.size()[1] == 1:
    x = x.repeat(1,3,1,1)
x, attn_weights, features = self.transformer(x)  # (B, n_patch, hidden)
x = self.decoder(x, features)
logits = self.segmentation_head(x)

// train

trainloader = DataLoader(db_train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True,
                             worker_init_fn=worker_init_fn)
    if args.n_gpu > 1:
        model = nn.DataParallel(model)
    model.train()
    ce_loss = CrossEntropyLoss()
    dice_loss = DiceLoss(num_classes)
    optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001)
    writer = SummaryWriter(snapshot_path + '/log')//
//

    iterator = tqdm(range(max_epoch), ncols=70)
    for epoch_num in iterator:
        for i_batch, sampled_batch in enumerate(trainloader):
            image_batch, label_batch = sampled_batch['image'], sampled_batch['label']
            image_batch, label_batch = image_batch.cuda(), label_batch.cuda()
            outputs = model(image_batch)
            loss_ce = ce_loss(outputs, label_batch[:].long())
            loss_dice = dice_loss(outputs, label_batch, softmax=True)
            loss = 0.5 * loss_ce + 0.5 * loss_dice
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

      torch.save(model.state_dict(), save_mode_path)
      iterator.close()

shchojj

关注

1
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
TransUNet

一、下载基于imagenet21k的vit预训练参数。二、将数据转换成npz格式，裁剪数值范围在[-125,275]之间的数据，并归一化到[0,1]，将3D volume转换成2D slices。但是测试的之后在h5格式的数据中依旧使用3D volume进行测试。三、训练预测//attention机制 //MLP初始化// //embedding计算patch大小、个数//patch embedding //pos
复制链接

扫一扫

专栏目录