pytorch SSD代码解读(2)

一、预测过程

letterbox_image为了防止失帧,不进行简单的resize,先放大图片,进行三次样条插值,创建一个300*300的灰色图片,把放大后的图片粘贴到灰色图片上,相当于在边缘加上灰条。

def letterbox_image(image, size):
    iw, ih = image.size
    w, h = size
    scale = min(w/iw, h/ih)
    nw = int(iw*scale)  # nw,nh一定有一个是300
    nh = int(ih*scale)

    image = image.resize((nw,nh), Image.BICUBIC)  # 三次样条插值
    new_image = Image.new('RGB', size, (128,128,128))  # 创建一个300*300的灰色图
    new_image.paste(image, ((w-nw)//2, (h-nh)//2))  # 把插值后的图片粘贴到灰色图中,指定左上角坐标
    return new_image

将预测的的框变成真实图片的框

def encode(matched, priors, variances):
    g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
    g_cxcy /= (variances[0] * priors[:, 2:])
    g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
    g_wh = torch.log(g_wh) / variances[1]
    return torch.cat([g_cxcy, g_wh], 1) 

计算所有的先验框和真实框的重合程度

def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
    # 计算所有的先验框和真实框的重合程度
    # [truth_box, num_prior]
    overlaps = jaccard(
        truths,
        point_form(priors)
    )
    # 所有真实框和先验框的最好重合程度
    # [truth_box,1]
    best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
    best_prior_idx.squeeze_(1)
    best_prior_overlap.squeeze_(1)
    # 所有先验框和真实框的最好重合程度
    # [1,prior]
    best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
    best_truth_idx.squeeze_(0)
    best_truth_overlap.squeeze_(0)
    # 找到与真实框重合程度最好的先验框,用于保证每个真实框都要有对应的一个先验框
    best_truth_overlap.index_fill_(0, best_prior_idx, 2)
    # 对best_truth_idx内容进行设置
    for j in range(best_prior_idx.size(0)):
        best_truth_idx[best_prior_idx[j]] = j
    
    # 找到每个先验框重合程度最好的真实框
    matches = truths[best_truth_idx]          # Shape: [num_priors,4]
    conf = labels[best_truth_idx] + 1         # Shape: [num_priors]
    # 如果重合程度小于threhold则认为是背景
    conf[best_truth_overlap < threshold] = 0  # label as background
    loc = encode(matches, priors, variances)
    loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
    conf_t[idx] = conf  # [num_priors] top class label for each prior

检测图片

    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        # letterbox_image为了防止失帧,不是简单resize,而是在边缘加上灰条
        crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1])))
        photo = np.array(crop_img,dtype = np.float64)
        # 图片预处理,归一化
        with torch.no_grad():
            # 从每个图像通道中减去给定的均值,torch中是BGR,transpose转换一下
            photo = torch.from_numpy(np.expand_dims(np.transpose(crop_img-MEANS,(2,0,1)),0)).type(torch.FloatTensor)
            if self.cuda:
                photo = photo.cuda()
            preds = self.net(photo)  # 把photo传入net中得到预测结果
        
        top_conf = []
        top_label = []
        top_bboxes = []
        for i in range(preds.size(1)):  # pred.size = (1,21,200,5),遍历21个类
            j = 0
            while preds[0, i,
  • 2
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值