小目标检测:CornerNet-Lite: Efﬁcient Keypoint Based Object Detection

最新推荐文章于 2022-01-16 19:29:20 发布

渐消散

最新推荐文章于 2022-01-16 19:29:20 发布

阅读量254

点赞数

分类专栏：论文阅读文章标签：深度学习 python

本文链接：https://blog.csdn.net/weixin_43257313/article/details/107397677

版权

论文阅读专栏收录该内容

12 篇文章 0 订阅

订阅专栏

论文地址：https://arxiv.org/pdf/1904.08900
代码地址：https://github.com/princeton-vl/CornerNet-Lite
一、网络结构：
在这里插入图片描述
网络结构描述不够清晰，论文里的描述不足以相像出架构：

大段描述网络对大、中、小目标分别做了不同缩放以及用大小为3和1的卷积核，根本不知道结构是怎样的，幸好公布了代码。
但是代码里并没有找到针对大、中、小目标的不同缩放，只是stack个分支网络，这也算🐎？
在这里插入图片描述
关于convolution和residual，就是普通的卷积和加了个跳链接。
注释下：
至于_pred_mod就是分类用的，如分成80个通道，就对应80个类。

到这里其实就可以看清楚网络结构了，即
3->256->256->384->384->384->512
256<-256<-384<-384<-384
每个正向箭头，因为有hg_layer，分辨率会减半，而后又会unsample，分辨率又变回原来的了。
最后拿这256个通道的特征，去卷积、归一化，然后预测和回归。
corner_pool：
在这里插入图片描述
其中的pool1和pool2，分别对应（TopPool, LeftPool）和（BottomPool, RightPool），是由c++写的，举top_pool为例：

这两个在这篇博客里有说，https://blog.csdn.net/dgyuanshaofeng/article/details/82048113，转载一下：

#include <torch/torch.h>
// https://github.com/pytorch/pytorch/tree/master/torch/csrc/api/include/torch
#include <vector>

std::vector<at::Tensor> top_pool_forward( // 前向传播
    at::Tensor input // 输入
) {
    // Initialize output
    at::Tensor output = at::zeros_like(input); // 输出，初始化为0值

    // Get height
    int64_t height = input.size(2); // input为[n, c, h, w]，取得h的大小
    								//如果改为三维，可能修改这里，因为[n, c, d, h, w]

    // Copy the last column
    at::Tensor input_temp  = input.select(2, height - 1);
    // https://pytorch.org/docs/0.4.1/tensors.html#torch.Tensor.select
    // select(dim, index) → Tensor
    // Slices the self tensor along the selected dimension at the given index.
    // 这里我觉得拿出的是最后一行。就是假如输入[1,3,3,5]，input_temp就是[1,3,5]。可能是C++和Python索引不同。
    // 拿出/选择最后一列，tensor[:,:,height-1,:]
    at::Tensor output_temp = output.select(2, height - 1);
    // 拿出/选择最后一列，tensor[:,:,height-1,:]
    output_temp.copy_(input_temp);
    // https://pytorch.org/docs/0.4.1/tensors.html#torch.Tensor.copy_
    // copy_(src, non_blocking=False) → Tensor
    // Copies the elements from src into self tensor and returns self.

    at::Tensor max_temp;
    for (int64_t ind = 1; ind < height; ++ind) {
        input_temp  = input.select(2, height - ind - 1);
        // from bottom to top
        // 从倒数第二行开始
        output_temp = output.select(2, height - ind);
        // from bottom to top
        // 从倒数第一行开始
        max_temp    = output.select(2, height - ind - 1);
        // from bottom to top
        // 从倒数第二行开始
        at::max_out(max_temp, input_temp, output_temp);
        // https://pytorch.org/cppdocs/api/function_namespaceat_1ab40751edb25d9ed68d4baa5047564a89.html
        // static Tensor &at::max_out(Tensor &out, const Tensor &self, const Tensor &other)
        // max_temp是从output的倒数第二列开始的，所以一开始为0；
        // input_temp为倒数第二列，其值不为0；
        // output_temp为倒数第一列，其值不为0；
        // input_temp和moutput_temp进行比较，保留较大者，影响output，形象的计算过程如Fig. 6
        
    }

    return { 
        output // 返回top-left点的top特征图，还需要left_pool.cpp计算出来的left特征图
    };
}

std::vector<at::Tensor> top_pool_backward( // 反向传播
    at::Tensor input, // 输入
    at::Tensor grad_output // 梯度
) {
    auto output = at::zeros_like(input); // 输出，初始化为0值

    int32_t batch   = input.size(0); // 样本数
    int32_t channel = input.size(1); // 通道数
    int32_t height  = input.size(2); // 高的大小
    int32_t width   = input.size(3); // 宽的大小

    auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width}); // 最大值, float
    auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, width}); // 最大值的索引, longint

    auto input_temp = input.select(2, height - 1); // 选择最后一列
    max_val.copy_(input_temp);
    // Copies the elements from src into self tensor and returns self.

    max_ind.fill_(height - 1);
    // Fills self tensor with the specified value. 即最后一列的位置

    auto output_temp      = output.select(2, height - 1); // 选择最后一列
    auto grad_output_temp = grad_output.select(2, height - 1); // 选择最后一列
    output_temp.copy_(grad_output_temp);
    // Copies the elements from src into self tensor and returns self.

    auto un_max_ind = max_ind.unsqueeze(2);
    // Returns a new tensor with a dimension of size one inserted at the specified position.
    auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, width});
    auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
    for (int32_t ind = 1; ind < height; ++ind) {
        input_temp = input.select(2, height - ind - 1);
        // 倒数第二列开始
        at::gt_out(gt_mask, input_temp, max_val); 
        // https://pytorch.org/docs/stable/torch.html#torch.gt
        // input_temp为倒数第二列
        // max_val为倒数第一列

        at::masked_select_out(max_temp, input_temp, gt_mask);
        // https://pytorch.org/docs/stable/torch.html#torch.masked_select
        // Returns a new 1-D tensor which indexes the input tensor according to the binary mask mask which is a ByteTensor.
        // 按照gt_mask取出input_temp的值，给max_temp
        max_val.masked_scatter_(gt_mask, max_temp);
        // https://pytorch.org/docs/stable/tensors.html#torch.Tensor.masked_scatter_
        // Copies elements from source into self tensor at positions where the mask is one. 
        max_ind.masked_fill_(gt_mask, height - ind - 1);
        // https://pytorch.org/docs/stable/tensors.html#torch.Tensor.masked_fill_
        // Fills elements of self tensor with value where mask is one.
        // gt_mask的值为1的位置，赋值hetiht - ind - 1，即倒数第二列

        grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
        output.scatter_add_(2, un_max_ind, grad_output_temp);
        // https://pytorch.org/docs/stable/tensors.html#torch.Tensor.scatter_add_
        // scatter_add_(dim, index, other) → Tensor
        // 要理解这部分
    }

    return {
        output
    };
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { // 后来在Python里面，要import TORCH_EXTENSION_NAME
    m.def(
        "forward", &top_pool_forward, "Top Pool Forward", // TORCH_EXTENSION_NAME.forward
        py::call_guard<py::gil_scoped_release>()
    );
    m.def(
        "backward", &top_pool_backward, "Top Pool Backward", // TORCH_EXTENSION_NAME.backward
        py::call_guard<py::gil_scoped_release>()
    );
}