代码:https://github.com/lufficc/SSD
解读:https://blog.csdn.net/qq_34784753/article/details/78889206
https://www.bbsmax.com/A/gGdX0Vepd4/
https://blog.csdn.net/u014380165/article/details/72824889/
空洞卷积:https://blog.csdn.net/hiudawn/article/details/84500648
代码解读:
# the SSD model will output the x, y, width, height parameters for all default boxes
prior_box.py 根据feature map大小生成先验anchor box
class PriorBox(nn.Module):
def __init__(self, cfg):
super(PriorBox, self).__init__()
self.image_size = cfg.INPUT.IMAGE_SIZE
prior_config = cfg.MODEL.PRIORS
self.feature_maps = prior_config.FEATURE_MAPS
self.min_sizes = prior_config.MIN_SIZES
self.max_sizes = prior_config.MAX_SIZES
self.strides = prior_config.STRIDES
self.aspect_ratios = prior_config.ASPECT_RATIOS
self.clip = prior_config.CLIP
def forward(self):
"""Generate SSD Prior Boxes.
It returns the center, height and width of the priors. The values are relative to the image size
Returns:
priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values
are relative to the image size.
"""
priors = []
for k, f in enumerate(self.feature_maps):
scale = self.image_size / self.strides[k]
for i, j in product(range(f), repeat=2):
# unit center x,y
cx = (j + 0.5) / scale
cy = (i + 0.5) / scale
# small sized square box
size = self.min_sizes[k]
h = w = size / self.image_size
priors.append([cx, cy, w, h])
# big sized square box
size = sqrt(self.min_sizes[k] * self.max_sizes[k])
h = w = size / self.image_size
priors.append([cx, cy, w, h])
# change h/w ratio of the small sized box
size = self.min_sizes[k]
h = w = size / self.image_size
for ratio in self.aspect_ratios[k]:
ratio = sqrt(ratio)
priors.append([cx, cy, w * ratio, h / ratio])
priors.append([cx, cy, w / ratio, h * ratio])
priors = torch.Tensor(priors)
if self.clip:
priors.clamp_(max=1, min=0)
return priors
box_utils.py
回归的是default boxes 的中心点相对于真实的偏移以及宽度和高度。
def convert_locations_to_boxes(locations, priors, center_variance,
size_variance):
"""Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
The conversion:
$$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
$$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
We do it in the inverse direction here.
Args:
locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
center_variance: a float used to change the scale of center.
size_variance: a float used to change of scale of size.
Returns:
boxes: priors: [[center_x, center_y, h, w]]. All the values
are relative to the image size.
"""
# priors can have one dimension less.
if priors.dim() + 1 == locations.dim():
priors = priors.unsqueeze(0)
return torch.cat([
locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
], dim=locations.dim() - 1)
分配每一个先验框对应的target
这里需要注意的是我们始终要学习的是default anchors 的偏移值
而此偏移值是相对于这个default box所匹配的ground truth boxes来说的
因此我们对数据集原有的category labels 和 boxes 进一步加工使得这些标签与default box匹配以便计算真正的loss
因此在SSD中,真正的标签是相对于default boxes来说的,而default boxes从头到尾都是不变的
def assign_priors(gt_boxes, gt_labels, corner_form_priors,
iou_threshold):
"""Assign ground truth boxes and targets to priors.
Args:
gt_boxes (num_targets, 4): ground truth boxes.
gt_labels (num_targets): labels of targets.
priors (num_priors, 4): corner form priors
Returns:
boxes (num_priors, 4): real values for priors.
labels (num_priros): labels for priors.
"""
# size: num_priors x num_targets
ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1))
# size: num_priors
# this calculate the best target each prior anchor box matches
best_target_per_prior, best_target_per_prior_index = ious.max(1)
# size: num_targets
# this calculate the best prior anchor box each target matches
best_prior_per_target, best_prior_per_target_index = ious.max(0)
for target_index, prior_index in enumerate(best_prior_per_target_index):
best_target_per_prior_index[prior_index] = target_index
# 2.0 is used to make sure every target has a prior assigned
best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2)
# size: num_priors
# calculate which ground truth category and box the prior anchor box should learn
labels = gt_labels[best_target_per_prior_index]
labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id
boxes = gt_boxes[best_target_per_prior_index]
return boxes, labels
multibox_loss.py
计算分类的cross entropy loss和回归的L1 loss
class MultiBoxLoss(nn.Module):
def __init__(self, neg_pos_ratio):
"""Implement SSD MultiBox Loss.
Basically, MultiBox loss combines classification loss
and Smooth L1 regression loss.
"""
super(MultiBoxLoss, self).__init__()
self.neg_pos_ratio = neg_pos_ratio
def forward(self, confidence, predicted_locations, labels, gt_locations):
"""Compute classification loss and smooth l1 loss.
Args:
confidence (batch_size, num_priors, num_classes): class predictions.
predicted_locations (batch_size, num_priors, 4): predicted locations.
labels (batch_size, num_priors): real labels of all the priors.
gt_locations (batch_size, num_priors, 4): real boxes corresponding all the priors.
"""
num_classes = confidence.size(2)
with torch.no_grad():
# derived from cross_entropy=sum(log(p))
loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio)
confidence = confidence[mask, :]
classification_loss = F.cross_entropy(confidence.view(-1, num_classes), labels[mask], reduction='sum')
pos_mask = labels > 0
predicted_locations = predicted_locations[pos_mask, :].view(-1, 4)
gt_locations = gt_locations[pos_mask, :].view(-1, 4)
smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, reduction='sum')
num_pos = gt_locations.size(0)
return smooth_l1_loss / num_pos, classification_loss / num_pos