用GluonCV训练自己的目标检测数据集

GluonCV

新手一枚不是CV领域的,有个项目需要在自己标的数据集上跑一下主流的detector,苦于网上的代码参差不齐,而具体性能State-of-the-art也未知,前期花了一大把时间也没什么进展,然后刷知乎看到了李沐的GluonCV,底层是开源的,一些主流的detector的也是基本能直接调用,也支持训练自己的数据集,这里就记录一下自己从零开始训练的整个过程。主要参考来源官网Tutorials

安装

安装过程比较简单,具体参考官网 Installation 的指令,然后在conda prompt命令行内进行下载即可(我是用的Anaconda,下载安装自行百度)

准备自己的数据集

参考 Prepare custom dataset
最快的方式是首先建立一个PASCAL VOC格式数据集(如使用labelImg标注可直接选择输出格式),然后可直接转换成GluonCV格式

训练教程

这里我首先跑了一遍教程Train YOLOv3 on PASCAL VOC

  • 下载VOC数据集
    在这里插入图片描述
    这里我是直接点链接下载,文件夹放置和命名参考了pascal_voc.py的源代码:
    – 两个07VOC.tar解压在VOC2007文件夹内
    12VOC.tar解压在VOC2012文件夹内
    benchmark.tgz解压在VOCaug文件夹内
    – 这里源代码还往 VOCaug/dataset/trainval.txt 里面写了个文件存放路径,我这里暂时没有加
    – 把VOCDetection的第一个参数root改成自定义的文件位置root='E:/YOLO/GluonCV_code/mxnet/datasets/voc'注意:路径要用左斜杠
    如果不改,root参数默认C:\Users\Username\mxnet\datasets\voc'
    在这里插入图片描述
  • 数据增强presets.yolo.YOLO3DefaultTrainTransform
    对训练集数据: 随机颜色失真,随机扩张/裁剪,随机翻转,随机Resize,颜色归一化
    对验证集数据: 随机Resize,颜色归一化
    对标签:与数据操作同步,保证一致性
  • 加载网络net = model_zoo.get_model('yolo3_darknet53_voc', pretrained_base=False)
  • 加载Dataloader,对数据分批的几个操作Pad,Stack可参考API
train_loader = DataLoader(train_dataset.transform(train_transform), batch_size, shuffle=True,
                          batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers)
  • 输出loss + 标准的autograd.record梯度下降流程
  • 教程最后有完整的training script下载,使用的思路和函数基本与这个相同,可自行下载阅读

最终使用的目标检测训练代码:(从官方代码进行修改得到)
模型:YOLOv3 + darknet backbone
batchsize = 4
数据集文件夹组织结构:
LED_dataset
– VOC2020
    – JPEGImages (放.jpg数据,从1.jpg开始数字命名)
    – ImageSets
        – Main
          – test.txt (测试集.jpg的索引)
          – train.txt (训练集.jpg的索引)
    – Annotations (放标签.xml文件)

# from __future__ import absolute_import # 从系统自带库中import,避免当前文件夹重名
from gluoncv.data import VOCDetection
from matplotlib import pyplot as plt
from gluoncv.utils import viz
from gluoncv.data.transforms import presets
from gluoncv import utils
from mxnet import nd
from gluoncv.data.batchify import Tuple, Stack, Pad
from mxnet.gluon.data import DataLoader
from gluoncv import model_zoo
from gluoncv import data
import mxnet as mx
import gluoncv as gcv
from mxnet import autograd, gluon
import time

def save_params(net, best_map, current_map, epoch, save_interval, prefix):
    current_map = float(current_map)
    if current_map > best_map[0]:
        best_map[0] = current_map
        net.save_parameters('{:s}_best.params'.format(prefix, epoch, current_map))
        with open(prefix+'_best_map.log', 'a') as f:
            f.write('{:04d}:\t{:.4f}\n'.format(epoch, current_map))
    if save_interval and epoch % save_interval == 0:
        net.save_parameters('{:s}_{:04d}_{:.4f}.params'.format(prefix, epoch, current_map))

def validate(net, val_data, ctx, eval_metric):
    """Test on validation dataset."""
    eval_metric.reset()
    # set nms threshold and topk constraint
    net.set_nms(nms_thresh=0.45, nms_topk=400)
    # nms时遍历objectness高到低,若有其它框IoU大于此阈值,则删除
    # nms_topk进行NMS后,每个图像要保留的总检测框数
    mx.nd.waitall()
    net.hybridize()
    for batch in val_data:
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        det_bboxes = []
        det_ids = []
        det_scores = []
        gt_bboxes = []
        gt_ids = []
        gt_difficults = []
        for x, y in zip(data, label):
            # get prediction results
            ids, scores, bboxes = net(x)
            det_ids.append(ids)
            det_scores.append(scores)
            # clip to image size
            det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
            # split ground truths
            gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
            gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
            gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)

        # update metric
        eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults)
    return eval_metric.get() # _recall_prec函数:get recall and precision from internal records

class VOCLike(VOCDetection):
    CLASSES = ['global','dc']

    def __init__(self, root, splits, transform=None, index_map=None, preload_label=True):
        super(VOCLike, self).__init__(root, splits, transform, index_map, preload_label)

from gluoncv.utils.metrics.voc_detection import VOC07MApMetric
'Customed dataset'
train_dataset = VOCLike(root='LED_dataset', splits=((2020, 'train'),))
val_dataset = VOCLike(root='LED_dataset', splits=((2020, 'test'),))
val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes)  # 算mAP
len_train_data = len(train_dataset)
len_val_data = len(val_dataset)

'Reset net'
# resume = './_best.params'
resume = ''
classes = ['global','dc']
net = gcv.model_zoo.get_model('yolo3_darknet53_voc', pretrained=True)
# net = gcv.model_zoo.get_model('yolo3_mobilenet1.0_voc', pretrained=True)
net.reset_class(classes)
if resume.strip():  # resume默认空,从以上一次启动,strip()去掉空格
    net.load_parameters(resume.strip())
# else:
#     net.initialize() # 网络初始化

'Dataloader'
utils.random.seed(123)
batch_size = 4
# batch_size = args.batch_size
num_workers = 0
from gluoncv.data.dataloader import RandomTransformDataLoader
width, height = 608, 608
batchify_fn = Tuple(
    *([Stack() for _ in range(6)] + [Pad(axis=0, pad_val=-1) for _ in range(1)]))
    # stack image, all targets generated
train_loader = gluon.data.DataLoader(
    train_dataset.transform(presets.yolo.YOLO3DefaultTrainTransform(width, height, net)),
    batch_size, True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers)
val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
val_loader = gluon.data.DataLoader(
val_dataset.transform(presets.yolo.YOLO3DefaultValTransform(width, height)),
    batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep', num_workers=num_workers)

try:
    a = mx.nd.zeros((1,), ctx=mx.gpu(0))
    ctx = [mx.gpu(0)]
except:
    ctx = [mx.cpu()]
# ctx = [mx.cpu()]

## start training
net.collect_params().reset_ctx(ctx) # Re-assign all Parameters to other contexts
# net._target_generator._label_smooth = True
trainer = gluon.Trainer(
    net.collect_params(), 'sgd',
    {'learning_rate': 0.0001, 'wd': 0.0005, 'momentum': 0.9})
## for yolo training
# targets
sigmoid_ce = gluon.loss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=False)
l1_loss = gluon.loss.L1Loss()

# metrics
obj_metrics = mx.metric.Loss('ObjLoss')
center_metrics = mx.metric.Loss('BoxCenterLoss')
scale_metrics = mx.metric.Loss('BoxScaleLoss')
cls_metrics = mx.metric.Loss('ClassLoss')

import logging
import os
# set up logger
logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
log_file_path = 'yolov3_train_822.log'
log_dir = os.path.dirname(log_file_path)
if log_dir and not os.path.exists(log_dir):
    os.makedirs(log_dir)
fh = logging.FileHandler(log_file_path)
logger.addHandler(fh)

best_map = [0]
train_epochs = 200
log_interval = 50   # in batches
val_interval = 1
save_interval = 2
save_prefix = ''
start_epoch = 10 # starting from 0
logger.info('Start training from [Epoch {}]'.format(start_epoch)) #封装好的info显示

for epoch in range(start_epoch,train_epochs):
    tic = time.time()
    btic = time.time()
    mx.nd.waitall() # 等待所有前面结果完成
    net.hybridize() # 转为静态图,静态图只建一次,然后不断复用它,容易在图上做优化,图的效率更高
                    # https://zhuanlan.zhihu.com/p/35202071
    for i, batch in enumerate(train_loader):
        """" split_and_load 是把一个batch的数据分到几个gpu上面,但是对于只一个GPU的,这个函数可有可无,此时 
             data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) 等价于data = batch[0] 
             下面的fixed_targets, gt_boxes类似就
        """
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        # batch[1:6]:objectness, center_targets, scale_targets, weights, class_targets
        fixed_targets = [gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
        gt_boxes = gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
        sum_losses = []
        obj_losses = []
        center_losses = []
        scale_losses = []
        cls_losses = []
        with autograd.record():
            for ix, x in enumerate(data):
            # data 是一个list类型传入的
            #   x.shape=[2, 3, 608, 608]
            #   gt_boxes 对应着一张图片上的物体的bbox
            # 然后累加一个batch的损失,以batch为单位进行backward()
                obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
                sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
                obj_losses.append(obj_loss)
                center_losses.append(center_loss)
                scale_losses.append(scale_loss)
                cls_losses.append(cls_loss)

                autograd.backward(sum_losses)

        trainer.step(batch_size)    # standard parameter update after autograd
        obj_metrics.update(0, obj_losses)
        center_metrics.update(0, center_losses)
        scale_metrics.update(0, scale_losses)
        cls_metrics.update(0, cls_losses)
        if log_interval and not (i + 1) % log_interval: # %求余数
            name1, loss1 = obj_metrics.get()
            name2, loss2 = center_metrics.get()
            name3, loss3 = scale_metrics.get()
            name4, loss4 = cls_metrics.get()
            logger.info('[Epoch {}][Batch {}], LR: {:.2E}, Speed: {:.3f} samples/sec, {}={:.3f}, {}={:.3f}, {}={:.3f}, {}={:.3f}'.format(
                epoch, i, trainer.learning_rate, batch_size/(time.time()-btic), name1, loss1, name2, loss2, name3, loss3, name4, loss4))
        btic = time.time()

# net.save_parameters('yolov3_0413.params')
    name1, loss1 = obj_metrics.get()
    name2, loss2 = center_metrics.get()
    name3, loss3 = scale_metrics.get()
    name4, loss4 = cls_metrics.get()
    logger.info('[Epoch {}] Training cost: {:.3f}, {}={:.3f}, {}={:.3f}, {}={:.3f}, {}={:.3f}'.format(
        epoch, (time.time()-tic), name1, loss1, name2, loss2, name3, loss3, name4, loss4))
    if not (epoch + 1) % val_interval:
        # consider reduce the frequency of validation to save time
        map_name, mean_ap = validate(net, val_loader, ctx, val_metric)
        val_msg = '\n'.join(['{}={}'.format(k, v) for k, v in zip(map_name, mean_ap)])
        logger.info('[Epoch {}] Validation: \n{}'.format(epoch, val_msg))
        current_map = float(mean_ap[-1])
    else:
        current_map = 0.
    save_params(net, best_map, current_map, epoch, save_interval, save_prefix)
  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值