transformer目标检测

import torch
from torch import nn
from torchvision.models import resnet50

class DETR(nn.Module):
    def __init__(self,num_classes,hidden_dim,nheads,num_encoder_layers,num_decoder_layers):
        super().__init__()
        self.backbone = nn.Sequential(*list(resnet50(pretrained=False).children())[:-2])
        self.conv = nn.Conv2d(2048,hidden_dim,1)
        self.transformer = nn.Transformer(hidden_dim,nheads,num_encoder_layers,num_decoder_layers)
        self.linear_class = nn.Linear(hidden_dim,num_classes + 1)
        self.linear_bbox = nn.Linear(hidden_dim,4)
        self.query_pos = nn.Parameter(torch.rand(100,hidden_dim))
        self.row_embed = nn.Parameter(torch.rand(100,hidden_dim // 2))
        self.col_embed = nn.Parameter(torch.rand(100,hidden_dim // 2))

    def forward(self,inputs):
        x = self.backbone(inputs)#[1, 2048, 20, 20]
        h = self.conv(x)#[1, 256, 20, 20]
        H,W = h.shape[-2:]#图片的宽高
        pos = torch.cat([
            self.col_embed[:W].unsqueeze(0).repeat(H,1,1),
            self.row_embed[:H].unsqueeze(1).repeat(1,W,1)
        ],dim=-1).flatten(0,1).unsqueeze(1)#[400, 1, 256]
        h = self.transformer(pos + h.flatten(2).permute(2,0,1),
                             self.query_pos.unsqueeze(1))#[100, 1, 256]
        return self.linear_class(h),self.linear_bbox(h).sigmoid()

detr = DETR(num_classes=80,hidden_dim=256,nheads=8,num_encoder_layers=6,num_decoder_layers=6)
detr.eval()
inputs = torch.rand(1,3,640,640)
logits,bboxes = detr(inputs)
print(logits.shape,bboxes.shape)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值