yolov3的整体网络结构代码实现
损失函数
def get_loss(output,
label_objectness,
label_location,
label_classification,
scales,num_anchors=3,
num_classes=7):
"""设计损失函数"""
reshaped_output=paddle.reshape(output,[-1,num_anchors,num_classes+5
,output.shape[2],output.shape[3]])
pred_objectness=reshaped_output[:,:,4:,:]
loss_objectness=F.binary_cross_entropy_with_logits\
(pred_objectness,label_objectness,reduction='none')
pos_objectness=label_objectness>0
pos_samples=paddle.cast(pos_objectness,'float32')
pos_samples.stop_gradient=True
tx = reshaped_output[:, :, 0, :, :]
ty = reshaped_output[:, :, 1, :, :]
tw = reshaped_output[:, :, 2, :, :]
th = reshaped_output[:, :, 3, :, :]
dx_label = label_location[:, :, 0, :, :]
dy_label = label_location[:, :, 1, :, :]
tw_label = label_location[:, :, 2, :, :]
th_label = label_location[:, :, 3, :, :]
loss_location_x = F.binary_cross_entropy_with_logits(tx, dx_label, reduction="none")
loss_location_y = F.binary_cross_entropy_with_logits(ty, dy_label, reduction="none")
loss_location_w = paddle.abs(tw - tw_label)
loss_location_h = paddle.abs(th - th_label)
loss_location = loss_location_x + loss_location_y + loss_location_h + loss_location_w
loss_location = loss_location * scales
loss_location = loss_location * pos_samples
pred_classification = reshaped_output[:, :, 5:5 + num_classes, :, :]
loss_classification = F.binary_cross_entropy_with_logits(pred_classification, label_classification,
reduction="none")
loss_classification = paddle.sum(loss_classification, axis=2)
loss_classification = loss_classification * pos_samples
total_loss = loss_objectness + loss_location + loss_classification
total_loss = paddle.sum(total_loss, axis=[1, 2, 3])
total_loss = paddle.mean(total_loss)
return total_loss
将图片放大
class Upsample(paddle.nn.Layer):
def __init__(self,scale=2):
super(Upsample, self).__init__()
self.scale=scale
def forward(self, inputs):
shape_nchw=paddle.shape(inputs)
shape_hw=paddle.slice(shape_nchw,axes=[0],starts=[2],ends=[4])
shape_hw.stop_gradient=True
in_shape=paddle.cast(shape_hw,dtype='int32')
out_shape=in_shape*self.scale
out_shape.stop_gradient=True
out=paddle.nn.functional.interpolate(
x=inputs,scale_factor=self.scale,mode='NEAREST'
)
return out
整体结构。
里面的损失函数是,将骨干网络输出的c0,c1,c2,转化成p后,将三个p分别与gtbox,也就是真实值计算损失值,然后将这三个损失值相加。这里调用了api,其原理相当于上面的损失函数使用三次
class YOLOV3(paddle.nn.Layer):
def __init__(self,num_classes=7):
super(YOLOV3, self).__init__()
self.num_classes=num_classes
self.block=DarkNet53_conv_body()
self.block_outputs=[]
self.yolo_blocks = []
self.route_blocks_2 = []
for i in range(3):
yolo_block=self.\
add_sublayer("yolo_detection_block_%d"%(i),
YoloDetectionBlock(
ch_in=512//(2**i)*2 if i==0 else 512//(2**i)*2+512//(2**i),
ch_out=512//(2**i)
))
self.yolo_blocks.append(yolo_block)
num_filters=3*(self.num_classes+5)
block_out=self.\
add_sublayer("yolo_detection_block_%d"%(i),
paddle.nn.Conv2D(
ch_in=512//(2**i)*2,
ch_out=num_filters,
kernel_size=1,
stride=1,
weight_attr=paddle.ParamAttr(
initializer=paddle.nn.initializer.Normal(0.,0.02)),
bias_attr=paddle.ParamAttr(
initializer=paddle.nn.initializer.Constant(0.0),
regularizer=paddle.regularizer.L2Decay(0.))
)
)
self.block_outputs.append(block_out)
if i<2:
route=self.add_sublayer("route2_%d"%(i),
ConvBNLayer(ch_in=512//(2**i),
ch_out=256/(2**i),
kernel_size=1,
stride=1,padding=0))
self.route_blocks_2.append(route)
self.upsample=Upsample
def forward(self, inputs):
outputs=[]
blocks=self.block[inputs]
for i,block in enumerate(blocks):
if i>0:
block=paddle.concat([route,block],axis=1)
route,tip=self.yolo_blocks[i](block)
block_out=self.block_outputs[i](tip)
outputs.append(block_out)
if i<2:
route=self.route_blocks_2[i](route)
route=self.upsample(route)
return outputs
def get_loss(self,outputs,gtbox,gtlabel,gtscore=None,
anchors=[10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326],
anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
ignore_thresh=0.7,
use_label_smooth=False):
self.losses=[]
downsample=32
for i,out in enumerate(outputs):
anchor_masks_i=anchor_masks[i]
loss=paddle.vision.ops.yolo_loss(
x=out,
gt_box=gtbox,
gt_label=gtlabel,
gt_score=gtscore,
anchors=anchors,
anchor_mask=anchor_masks_i,
class_num=self.num_classes,
ignore_thresh=ignore_thresh,
downsample_ratio=downsample,
use_label_smooth=False
)
self.losses.append(loss)
downsample=downsample//2
return sum(self.losses)