SOLO & SOLOv2训练数据集
接上一篇实例分割SOLO & SOLOv2环境配置(ubuntu20.04 + miniconda)
考虑到使用开源代码训练公开数据集缺少灵魂🤥,应该先使用开源代码训练自建数据集,然后就是使用改进的代码训练自建数据集🤤。 直接上流程😏
准备数据集
将数据集转成coco的数据格式,目录结构如下
不清楚怎么转coco数据格式的可以先移步搜索引擎,转好再回来
data
└── coco
├── annotations
│ ├── instances_train2017.json
│ └── instances_val2017.json
└── images
├── test2017
├── train2017
└── val2017
注册数据类别
在 mmdet/datasets/
目录下创建自己的数据类别文件,例如 XDataset.py
增加代码如下
from .coco import CocoDataset
from .registry import DATASETS
#add new dataset
@DATASETS.register_module
class XDataset(CocoDataset):
CLASSES = ['a', 'b', 'c']
# CLASSES里对应数据集的真实类别名
在 mmdet/datasets/__init__.py
文件中添加刚创建的数据格式 XDataset
,修改后代码如下
from .builder import build_dataset
from .cityscapes import CityscapesDataset
from .coco import CocoDataset
from .custom import CustomDataset
from .dataset_wrappers import ConcatDataset, RepeatDataset
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from .registry import DATASETS
from .voc import VOCDataset
from .wider_face import WIDERFaceDataset
from .xml_style import XMLDataset
from .XDataset import XDataset
__all__ = [
'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset',
'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'WIDERFaceDataset',
'DATASETS', 'build_dataset', "XDataset"
]
修改配置文件
找到需要使用的模型配置文件,例如在 configs/solov2/xsolov2_light_448_r34_fpn_8gpu_3x.py
文件中更改检测类别,训练尺寸和数据类别,分别对应 num_classes
, img_scale
和 dataset_type
,修改后代码如下
# model settings
model = dict(
type='SOLOv2',
pretrained='torchvision://resnet34',
backbone=dict(
type='ResNet',
depth=34,
num_stages=4,
out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[64, 128, 256, 512],
out_channels=256,
start_level=0,
num_outs=5),
bbox_head=dict(
type='SOLOv2Head',
# a b c background
num_classes=4,
in_channels=256,
stacked_convs=2,
seg_feat_channels=256,
strides=[8, 8, 16, 32, 32],
scale_ranges=((1, 56), (28, 112), (56, 224), (112, 448), (224, 896)),
sigma=0.2,
num_grids=[40, 36, 24, 16, 12],
ins_out_channels=128,
loss_ins=dict(
type='DiceLoss',
use_sigmoid=True,
loss_weight=3.0),
loss_cate=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0)),
mask_feat_head=dict(
type='MaskFeatHead',
in_channels=256,
out_channels=128,
start_level=0,
end_level=3,
num_classes=128,
norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)),
)
# training and testing settings
train_cfg = dict()
test_cfg = dict(
nms_pre=500,
score_thr=0.1,
mask_thr=0.5,
update_thr=0.05,
kernel='gaussian', # gaussian/linear
sigma=2.0,
max_per_img=100)
# dataset settings
# dataset_type = 'CocoDataset'
dataset_type = 'XDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize',
# img_scale=[(768, 512), (768, 480), (768, 448),
# (768, 416), (768, 384), (768, 352)],
img_scale=[(640, 480), (640, 420), (640, 400),
(640, 360), (640, 320), (640, 300)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
# img_scale=(768, 448),
img_scale=(640, 480),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'images/train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'images/val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'images/val2017/',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.01,
step=[27, 33])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 300
device_ids = range(8)
dist_params = dict(backend='nccl')
log_level = 'INFO'
# 模型训练日志存放地址
work_dir = './work_dirs/solov2_light_release_r34_fpn_8gpu_3x'
load_from = None
resume_from = None
workflow = [('train', 1)]
训练数据集
打开终端,进入虚拟环境中运行代码
python tools/train.py configs/solov2/solov2_light_448_r34_fpn_8gpu_3x.py
测试权重
在 ./work_dirs/solov2_light_release_r34_fpn_8gpu_3x
文件夹下挑选希望使用的权重文件,更改 inference_demo.py
文件里 checkpoint_file
和对应的模型路径 config_file
以及测试图像读取路径 img
打开终端运行
python demo/inference_demo.py
注意!使用Pycharm等工具和终端直接运行检索的路径有区别,将出错路径改为绝对路径可以解决大部分路径问题