一.获取到网络三个tensor输出
hm (heat map缩写特征图) 得到网络输出后做一个sigmoid()把值映射到0-1
wh (width height 回归长宽 这里回归出来长宽是在网络输入长宽)
reg(regress 回归中心点偏移 这里回归出来偏移是在网络输入偏移)
二.nms 这里和非anchor free方法目前不一样.下面贴出我的c++代码
torch::Tensor _nms(torch::Tensor heat, int64_t kernel = 3)
{
int64_t pad = (kernel - 1) / 2;
torch::Tensor hmax = at::max_pool2d(heat, {kernel, kernel}, {1, 1}, {pad, pad});
torch::Tensor keep = (hmax == heat).toType(torch::kFloat32);
return heat * keep;
}
把heatmap做一个max_pool2d保留和原来featuremap一样的值,
简单来说,这个操作目的是:对于特征图上单个点来说如果是周围(默认是包含自己周围9个点)最大则保留,否则记为0.
三.topk 把特征图信息分类出来,并且取到前k个score.
torch::Tensor _gather_feat(torch::Tensor feat, torch::Tensor ind)
{
int64_t dim = feat.size(2);
ind = ind.unsqueeze(2).expand({ind.size(0), ind.size(1), dim});
feat = feat.gather(1, ind);
return feat;
}
void _topk(const torch::Tensor &scores,
torch::Tensor &topk_score,
torch::Tensor &topk_inds,
torch::Tensor &topk_clses,
torch::Tensor &topk_ys,
torch::Tensor &topk_xs,
int64_t K = 100)
{
//获取特征图尺寸
int64_t batch = scores.sizes()[0];
int64_t cat = scores.sizes()[1];
int64_t height = scores.sizes()[2];
int64_t width = scores.sizes()[3];
//假设特征图大小为(1,20,128,128) 这里就是20分类的检测器
//排序
std::tuple<torch::Tensor, torch::Tensor> topk_score_inds =
torch::topk(scores.view({batch, cat, -1}), K);
//得到排序结果按默认值和假设这里tensor尺寸为(1,20,100)
torch::Tensor topk_scores = std::get<0>(topk_score_inds);
topk_inds = std::get<1>(topk_score_inds);
// torch::topk得到的indexs是特征图绝对位置(这里是指拉成一维的位置),这里除以特征图面积,得到相对偏移量
topk_inds = topk_inds % (height * width);
//这里得到具体x,y的偏移量
topk_ys = (topk_inds / width).toType(torch::kInt32).toType(torch::kFloat32);
topk_xs = (topk_inds % width).toType(torch::kInt32).toType(torch::kFloat32);
//加上类别再选出topk
std::tuple<torch::Tensor, torch::Tensor> topk_score_ind =
torch::topk(topk_scores.view({batch, -1}), K);
topk_score = std::get<0>(topk_score_ind);
torch::Tensor topk_ind = std::get<1>(topk_score_ind);
//这里除以K而不是除以特征图面积得到类别,是因为这个实在筛选出来的(类别数,K)的tensor上,因为topk_scores的尺寸为(1,20,100)
topk_clses = (topk_ind / K).toType(torch::kInt32);
//分类排序筛选出原始位置,做这步原因是这次topk的到的topk_ind不是在原始特征图上得到,需要映射回去
topk_inds = _gather_feat(topk_inds.view({batch, -1, 1}), topk_ind).view({batch, K});
topk_ys = _gather_feat(topk_ys.view({batch, -1, 1}), topk_ind).view({batch, K});
topk_xs = _gather_feat(topk_xs.view({batch, -1, 1}), topk_ind).view({batch, K});
}
四.剩下的就比较简单,既然topk的类别分数已经拿到,后面根据分数的index找到另外两个输出,
torch::Tensor _tranpose_and_gather_feat(torch::Tensor feat, torch::Tensor ind)
{
feat = feat.permute({0, 2, 3, 1}).contiguous();
feat = feat.view({feat.size(0), -1, feat.size(3)});
feat = _gather_feat(feat, ind);
return feat;
}
//后处理主流程
//得到输出
auto output = value.toTuple();
std::vector<c10::IValue> outputs = output->elements();
torch::Tensor hm = outputs[0].toTensor().sigmoid();
torch::Tensor wh = outputs[1].toTensor();
torch::Tensor reg = outputs[2].toTensor();
//nms
hm = _nms(hm);
//topk
torch::Tensor scores;
torch::Tensor topk_inds;
torch::Tensor topk_clses;
torch::Tensor topk_ys;
torch::Tensor topk_xs;
_topk(hm, scores, topk_inds, topk_clses, topk_ys, topk_xs);
//get score cls and box
int64_t batch = 1;
int64_t K = 100;
reg = _tranpose_and_gather_feat(reg, topk_inds);
reg = reg.view({batch, K, 2});
torch::Tensor xs = topk_xs.view({batch, K, 1}) + reg.slice(2, 0, 1);
torch::Tensor ys = topk_ys.view({batch, K, 1}) + reg.slice(2, 1, 2);
wh = _tranpose_and_gather_feat(wh, topk_inds);
wh = wh.view({batch, K, 2});
torch::Tensor clses = topk_clses.view({batch, K, 1}).toType(torch::kFloat32);
scores = scores.view({batch, K, 1});
std::vector<torch::Tensor> vec_tensor =
{(xs - wh.slice(2, 0, 1) / 2),
(ys - wh.slice(2, 1, 2) / 2),
(xs + wh.slice(2, 0, 1) / 2),
(ys + wh.slice(2, 1, 2) / 2)};
torch::Tensor bboxes = torch::cat(vec_tensor, 2);
torch::Tensor scores_cpu = scores.cpu();
torch::Tensor bboxes_cpu = bboxes.cpu();
torch::Tensor cls_cpu = clses.cpu();
五.这里得到的预测是针对网络输入尺寸的位置,需要根据你的预处理缩放到原图位置.