之前看过yolo带角度的定位项目:rotatable_yolov3、rotate-yolov3-U、ryolov3research-pytorch-master。
这些虽好,但是并不适合我。
然后看到了yolo定位任意四边形的项目:YOLOv3-quadrangle、Yolo-ArbPolygon。
只是用起来了YOLOv3-quadrangle,Yolo-ArbPolygon还没有用起来。
其中YOLOv3-quadrangle关键点是损失函数。
def forward(self, p, targets=None, requestPrecision=False):
FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor
bs = p.shape[0] # batch size
nG = p.shape[2] # number of grid points
stride = self.img_dim / nG
if p.is_cuda and not self.grid_x.is_cuda:
self.grid_x, self.grid_y = self.grid_x.cuda(), self.grid_y.cuda()
self.anchor_w, self.anchor_h = self.anchor_w.cuda(), self.anchor_h.cuda()
# self.weights = self.weights.cuda()
# p.view(1, 30, 13, 13) -- > (1, 3, 13, 13, 10) # (bs, anchors, grid, grid, classes + xywh)
p = p.view(bs, self.nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction
# Get outputs
P1_x = p[..., 0] # Point1 x
P1_y = p[..., 1] # Point1 y
P2_x = p[..., 2] # Point2 x
P2_y = p[..., 3] # Point2 y
P3_x = p[..., 4] # Point3 x
P3_y = p[..., 5] # Point3 y
P4_x = p[..., 6] # Point4 x
P4_y = p[..., 7] # Point4 y
pred_boxes = FT(bs, self.nA, nG, nG, 8)
pred_conf = p[..., 8] # Conf
pred_cls = p[..., 9:] # Class
# Training
if targets is not None:
MSELoss = nn.MSELoss()
BCEWithLogitsLoss = nn.BCEWithLogitsLoss()
CrossEntropyLoss = nn.CrossEntropyLoss()
SmoothL1Loss = nn.SmoothL1Loss()
if requestPrecision:
gx = self.grid_x[:, :, :nG, :nG]
gy = self.grid_y[:, :, :nG, :nG]
pred_boxes[..., 0] = P1_x.data + gx
pred_boxes[..., 1] = P1_y.data + gy
pred_boxes[..., 2] = P2_x.data + gx
pred_boxes[..., 3] = P2_y.data + gy
pred_boxes[..., 4] = P3_x.data + gx
pred_boxes[..., 5] = P3_y.data + gy
pred_boxes[..., 6] = P4_x.data + gx
pred_boxes[..., 7] = P4_y.data + gy
t1_x, t1_y, t2_x, t2_y, t3_x, t3_y, t4_x, t4_y, mask, tcls, TP, FP, FN, TC = \
build_targets(pred_boxes, pred_conf, pred_cls, targets, self.scaled_anchors, self.nA, self.nC, nG,
requestPrecision)
tcls = tcls[mask]
if P1_x.is_cuda:
t1_x, t1_y, t2_x, t2_y, t3_x, t3_y, t4_x, t4_y, mask, tcls = \
t1_x.cuda(), t1_y.cuda(), t2_x.cuda(), t2_y.cuda(), t3_x.cuda(), t3_y.cuda(), t4_x.cuda(), t4_y.cuda(), mask.cuda(), tcls.cuda()
mask = mask.bool()
# Compute losses
nT = sum([len(x) for x in targets]) # Number of targets
nM = mask.sum().float() # Number of anchors (assigned to targets)
nB = len(targets) # Batch size
k = nM / nB
if nM > 0:
lx1 = (k) * SmoothL1Loss(P1_x[mask], t1_x[mask]) / 8
ly1 = (k) * SmoothL1Loss(P1_y[mask], t1_y[mask]) / 8
lx2 = (k) * SmoothL1Loss(P2_x[mask], t2_x[mask]) / 8
ly2 = (k) * SmoothL1Loss(P2_y[mask], t2_y[mask]) / 8
lx3 = (k) * SmoothL1Loss(P3_x[mask], t3_x[mask]) / 8
ly3 = (k) * SmoothL1Loss(P3_y[mask], t3_y[mask]) / 8
lx4 = (k) * SmoothL1Loss(P4_x[mask], t4_x[mask]) / 8
ly4 = (k) * SmoothL1Loss(P4_y[mask], t4_y[mask]) / 8
lconf = (k * 10) * BCEWithLogitsLoss(pred_conf, mask.float())
lcls = (k / self.nC) * CrossEntropyLoss(pred_cls[mask], torch.argmax(tcls, 1))
else:
lx1, ly1, lx2, ly2, lx3, ly3, lx4, ly4, lcls, lconf = \
FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0])
# Sum loss components
loss = lx1 + ly1 + lx2 + ly2 + lx3 + ly3 + lx4 + ly4 + lconf + lcls
# Sum False Positives from unassigned anchors
i = torch.sigmoid(pred_conf[~mask]) > 0.5
if i.sum() > 0:
FP_classes = torch.argmax(pred_cls[~mask][i], 1)
FPe = torch.bincount(FP_classes, minlength=self.nC).float().cpu()
else:
FPe = torch.zeros(self.nC)
return loss, loss.item(), lconf.item(), lcls.item(), nT, TP, FP, FPe, FN, TC
else:
pred_boxes[..., 0] = P1_x + self.grid_x
pred_boxes[..., 1] = P1_y + self.grid_y
pred_boxes[..., 2] = P2_x + self.grid_x
pred_boxes[..., 3] = P2_y + self.grid_y
pred_boxes[..., 4] = P3_x + self.grid_x
pred_boxes[..., 5] = P3_y + self.grid_y
pred_boxes[..., 6] = P4_x + self.grid_x
pred_boxes[..., 7] = P4_y + self.grid_y
output = torch.cat((pred_boxes.view(bs, -1, 8) * stride,
torch.sigmoid(pred_conf.view(bs, -1, 1)), pred_cls.view(bs, -1, self.nC)), -1)
return output