夯实基础系列:文本检测算法DBNet核心代码torch实现

核心代码(可以直接运行查看各个Shape)
  • 参考PaddleOCR中configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml的实现
  • 以下代码只给出了搭建模型和推理的代码,前后处理代码均已省略。
# -*- encoding: utf-8 -*-
# @Author: SWHL
# @Contact: liekkaskono@163.com
import torch

from torch import nn


class DBHead(nn.Module):
    def __init__(self, in_channels, k=50, is_train=False):
        super().__init__()

        self.k = k
        self.binarize = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // 4, 3, padding=1),
            nn.BatchNorm2d(in_channels // 4),
            nn.ReLU(inplace=True),
			
			# 空洞卷积增大感受野
            nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2),
            nn.BatchNorm2d(in_channels // 4),
            nn.ReLU(inplace=True),

            nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
            nn.Sigmoid(),
        )

        self.thresh = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // 4, 3, padding=1, bias=False),
            nn.BatchNorm2d(in_channels // 4),
            nn.ReLU(inplace=True),

            nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2),
            nn.BatchNorm2d(in_channels // 4),
            nn.ReLU(inplace=True),

            nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
            nn.Sigmoid()
        )

        self.is_train = is_train
        self.k = k

    def forward(self, x): # x: [1, 96, 240, 240]
        shrink_maps = self.binarize(x)  # shrink_maps: [1, 1, 960, 960]
        threshold_maps = self.thresh(x)  # threshold_maps: [1, 1, 960, 960]

        if self.is_train:
            # train
            binary_maps = self.step_function(shrink_maps, threshold_maps)
            y = torch.cat((shrink_maps, threshold_maps, binary_maps), dim=1)  # y shape: [1, 3, 960, 960]
        else:
            # infer
            y = torch.cat([shrink_maps, threshold_maps], dim=1)  # y shape: [1, 2, 960, 960]
        return y

    def step_function(self, x, y):
        return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))


class MaskL1Loss(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, pred, gt, mask):
        loss = (torch.abs(pred - gt) * mask).sum() / (mask.sum() + 1e-6)
        return torch.mean(loss)


class DiceLoss(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, pred, gt, mask):
        intersection = torch.sum(pred * gt * mask)
        union = torch.sum(pred * mask) + torch.sum(gt * mask) + 1e-6
        loss = 1 - 2.0 * intersection / union
        return loss


class BalanceLoss(nn.Module):
    def __init__(self, negative_ratio=3, main_loss_type='DiceLoss'):
        super().__init__()
        self.negative_ratio = negative_ratio
        self.eps = 1e-6
        if main_loss_type == 'DiceLoss':
            self.loss = DiceLoss()

    def forward(self, pred, gt, mask=None):
        positive = gt * mask
        negative = (1 - gt) * mask

        positive_count = int(positive.sum())
        negative_count = int(min(negative.sum(),
                                 positive_count * self.negative_ratio))
        loss = self.loss(pred, gt, mask=mask)

        positive_loss = positive * loss
        negative_loss = negative * loss

        if negative_count > 0:
            negative_loss = torch.reshape(negative_loss, shape=[-1])
            sort_loss = negative_loss.sort(descending=True)[0]
            negative_loss = sort_loss[:negative_count]

            balance_loss = (positive_loss.sum() + negative_loss.sum()) / \
                (positive_count + negative_count + self.eps)
        else:
            balance_loss = positive_loss.sum() / (positive_count + self.eps)
        return balance_loss


if __name__ == '__main__':
    # x 是输入为3x960x960,经过了卷积和FPN层之后的输出
    # 采用的网络MobileNetv3 large 0.5
    # DBFPN out_channels: 96

    x = torch.randn(1, 96, 240, 240)

    label_threshold_map = torch.randn(1, 960, 960)
    label_threshold_mask = torch.randint(low=0, high=2, size=(1, 960, 960))
    label_shrink_map = torch.randn(1, 960, 960)
    label_shrink_mask = torch.randint(low=0, high=2, size=(1, 960, 960))

    model = DBHead(96, 3, is_train=True)
    y = model(x)

    l1_loss = MaskL1Loss()
    dice_loss = DiceLoss()
    bce_loss = BalanceLoss()

    shrink_maps = y[:, 0, :, :]
    threshold_maps = y[:, 1, :, :]
    binary_maps = y[:, 2, :]

    loss_threshold_maps = l1_loss(threshold_maps,
                                  label_threshold_map,
                                  label_threshold_mask)
    loss_shrink_maps = bce_loss(shrink_maps,
                                label_shrink_map,
                                label_shrink_mask)
    loss_binary_maps = dice_loss(binary_maps,
                                 label_shrink_map,
                                 label_shrink_mask)
    alpha, beta = 5, 10

    loss_shrink_maps = alpha * loss_shrink_maps
    loss_threshold_maps = beta * loss_threshold_maps
    loss_all = loss_shrink_maps + loss_threshold_maps + loss_binary_maps

    print(loss_all)

  • shrink_map作用:用作和pred结果计算差距
  • shrink_mask作用:让损失计算聚焦于mask部分,示例代码:torch.abs(pred - gt) * mask
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值