yoloV3训练与推理
真实框与绘制的框,都是根据基础的框,
也就是上面定义的anchors,和一些偏差相加减得到的
我们的目标样本是:根据真实地标记框,来标记的0,1和物体类别,框的位置
我们的前向传播样本,是根据仅仅的一张图片得到一个形状和目标样本一样的东西
要做的是:使得传播后的样本拟合目标样本
class YOLOV3(paddle.nn.Layer):
def __init__(self,num_classes=7):
super(YOLOV3, self).__init__()
self.num_classes=num_classes
self.block=DarkNet53_conv_body()
self.block_outputs=[]
self.yolo_blocks = []
self.route_blocks_2 = []
for i in range(3):
yolo_block=self.\
add_sublayer("yolo_detection_block_%d"%(i),
YoloDetectionBlock(
ch_in=512//(2**i)*2 if i==0 else 512//(2**i)*2+512//(2**i),
ch_out=512//(2**i)
))
self.yolo_blocks.append(yolo_block)
num_filters=3*(self.num_classes+5)
block_out=self.\
add_sublayer("yolo_detection_block_%d"%(i),
paddle.nn.Conv2D(
ch_in=512//(2**i)*2,
ch_out=num_filters,
kernel_size=1,
stride=1,
weight_attr=paddle.ParamAttr(
initializer=paddle.nn.initializer.Normal(0.,0.02)),
bias_attr=paddle.ParamAttr(
initializer=paddle.nn.initializer.Constant(0.0),
regularizer=paddle.regularizer.L2Decay(0.))
)
)
self.block_outputs.append(block_out)
if i<2:
route=self.add_sublayer("route2_%d"%(i),
ConvBNLayer(ch_in=512//(2**i),
ch_out=256/(2**i),
kernel_size=1,
stride=1,padding=0))
self.route_blocks_2.append(route)
self.upsample=Upsample
def forward(self, inputs):
outputs=[]
blocks=self.block[inputs]
for i,block in enumerate(blocks):
if i>0:
block=paddle.concat([route,block],axis=1)
route,tip=self.yolo_blocks[i](block)
block_out=self.block_outputs[i](tip)
outputs.append(block_out)
if i<2:
route=self.route_blocks_2[i](route)
route=self.upsample(route)
return outputs
def get_loss(self,outputs,gtbox,gtlabel,gtscore=None,
anchors=[10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326],
anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
ignore_thresh=0.7,
use_label_smooth=False):
self.losses=[]
downsample=32
for i,out in enumerate(outputs):
anchor_masks_i=anchor_masks[i]
loss=paddle.vision.ops.yolo_loss(
x=out,
gt_box=gtbox,
gt_label=gtlabel,
gt_score=gtscore,
anchors=anchors,
anchor_mask=anchor_masks_i,
class_num=self.num_classes,
ignore_thresh=ignore_thresh,
downsample_ratio=downsample,
use_label_smooth=False
)
self.losses.append(loss)
downsample=downsample//2
return sum(self.losses)
def get_pred(self,
outputs,
im_shape=None,
anchors=[10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326],
anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
valid_thresh=0.01):
downsample=32
total_boxes=[]
total_scores=[]
for i,out in enumerate(outputs):
anchors_mask=anchor_masks[i]
anchors_this_level=[]
for m in anchors_mask:
anchors_this_level.append(anchors[2*m])
anchors_this_level.append(anchors[2*m+1])
"""
"""
boxes,scores=paddle.vision.ops.yolo_box(
x=out,
img_size=im_shape,
anchors=anchors_this_level,
class_num=self.num_classes,
conf_thresh=valid_thresh,
downsample_ratio=downsample,
name="yolo_box" + str(i)
)
total_boxes.append(boxes)
total_scores.append(
paddle.transpose(
scores,perm=[0,2,1]
)
)
downsample=downsample//2
yolo_boxes=paddle.concat(total_boxes,axis=1)
yolo_scores=paddle.concat(total_scores,axis=2)
return yolo_boxes,yolo_scores
模型训练
ANCHORS = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
ANCHOR_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
IGNORE_THRESH = .7
NUM_CLASSES = 7
def get_lr(base_lr=0.0001,lr_decay=0.1):
bd=[10000,20000]
lr = [base_lr, base_lr * lr_decay, base_lr * lr_decay * lr_decay]
learning_rate = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, values=lr)
return learning_rate
if __name__ == '__main__':
TRAINDIR = '/home/aistudio/work/insects/train'
TESTDIR = '/home/aistudio/work/insects/test'
VALIDDIR = '/home/aistudio/work/insects/val'
paddle.set_device("gpu:0")
train_dataset = 0
valid_dataset = 0
test_dataset = 0
train_loader = paddle.io.DataLoader(train_dataset, batch_size=10, shuffle=True, num_workers=0, drop_last=True,
use_shared_memory=False)
valid_loader = paddle.io.DataLoader(valid_dataset, batch_size=10, shuffle=False, num_workers=0, drop_last=False,
use_shared_memory=False)
model=YOLOV3(num_classes=NUM_CLASSES)
learning_rate=get_lr()
opt=paddle.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
weight_decay=paddle.regularizer.L2Decay(0.0005),
parameters=model.parameters()
)
MAX_EPOCH=200
for epoch in range(MAX_EPOCH):
for i,data in enumerate(train_loader):
img,gt_boxes,gt_labels,img_scale=data
gt_scores=np.ones(gt_labels.shape).astype('float32')
gt_scores=paddle.to_tensor(gt_scores)
img=paddle.to_tensor(img)
gt_boxes = paddle.to_tensor(gt_boxes)
gt_labels = paddle.to_tensor(gt_labels)
outputs=model(img)
loss=model.get_loss(outputs,
gt_boxes,
gt_labels,
gtscore=gt_scores,
anchors=ANCHORS,
anchor_masks=ANCHOR_MASKS,
ignore_thresh=IGNORE_THRESH,
use_label_smooth=False
)
loss.backward()
opt.step()
opt.clear_grad
model.eval()
for i, data in enumerate(valid_loader()):
img, gt_boxes, gt_labels, img_scale = data
gt_scores = np.ones(gt_labels.shape).astype('float32')
gt_scores = paddle.to_tensor(gt_scores)
img = paddle.to_tensor(img)
gt_boxes = paddle.to_tensor(gt_boxes)
gt_labels = paddle.to_tensor(gt_labels)
outputs = model(img)
loss = model.get_loss(outputs, gt_boxes, gt_labels, gtscore=gt_scores,
anchors=ANCHORS,
anchor_masks=ANCHOR_MASKS,
ignore_thresh=IGNORE_THRESH,
use_label_smooth=False)
model.train()
模型推理,后面再根据非极大值抑制来得到最准确的框
outputs = model.forward(img)
bboxes, scores = model.get_pred(outputs,
im_shape=img_scale,
anchors=ANCHORS,
anchor_masks=ANCHOR_MASKS)