0、环境说明
cd PolarMask/
conda create --name polarmask python=3.7
conda activate polarmask
pip install torch==1.2.0
pip install torchvision==0.4.0
pip install Cython
python setup.py develop
pip install mmcv
pip install prompt-toolkit==2.0.10
pip install Polygon3
机器环境:nvidia-418
+cuda 10.0
+cudnn 7.6.3
1、config设置
配置文件的设置:
# model settings
model = dict(
type='PolarMask',
pretrained='open-mmlab://resnet50_caffe',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs=True,
extra_convs_on_inputs=False, # use P5
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type='PolarMask_Head',
num_classes=81,
in_channels=256,
stacked_convs=4,
feat_channels=256,
strides=[8, 16, 32, 64, 128],
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='IoULoss', loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)))
# training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100)
# dataset settings
dataset_type = 'Coco_Seg_Dataset'
data_root = '/media/dell/Elements/dataset/MSCOCO/2017/' # 数据路径
img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=5,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1280, 768),
img_norm_cfg=img_norm_cfg,
# size_divisor=0,
flip_ratio=0.5,
with_mask=True,
with_crowd=False,
with_label=True,
resize_keep_ratio=False),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1280, 768),
img_norm_cfg=img_norm_cfg,
# size_divisor=0,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=True,
resize_keep_ratio=False),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1280, 768),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=False,
resize_keep_ratio=False,
test_mode=True))
# optimizer
lr_ratio = 1
optimizer = dict(
type='SGD',
lr=0.01 / 8 * lr_ratio,
momentum=0.9,
weight_decay=0.0001,
paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3 / lr_ratio,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=10,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
device_ids = range(2) # 由几张卡决定
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/trash'
load_from = None
resume_from = None
workflow = [('train', 1)]
2、训练语句
双GPU
python ./tools/train.py configs/polarmask/4gpu/polar_768_1x_r50.py --gpus 2 --launcher pytorch --work_dir ./tools/work_dirs/polar_768_1x_r50_4gpu --resume_from ./tools/work_dirs/polar_768_1x_r50_4gpu/epoch_2.pth
单GPU
python ./tools/train.py configs/polarmask/4gpu/polar_768_1x_r50.py --gpus 1 --launcher pytorch --work_dir ./tools/work_dirs/polar_768_1x_r50_4gpu
看出差别了吗?
3、测试语句
python ./tools/test.py configs/polarmask/4gpu/polar_768_1x_r50.py ./tools/work_dirs/polar_768_1x_r50_4gpu/latest.pth --launcher pytorch --out work_dirs/trash/res.pkl --eval segm
4、出现问题
1、ModuleNotFoundError: No module named 'mmcv.cnn.weight_init'
解决:pip install mmcv==0.5.9
2、ImportError: cannot import name 'AsyncGenerator'
解决:pip install prompt-toolkit==2.0.10
3、ImportError: cannot import name 'get_dist_info'
解决:
将/PolarMask/mmdet/datasets/loader/sampler.py的第六行的from mmcv.runner.utils import get_dist_infoto
改成from mmcv.runner import get_dist_info
from mmcv.runner import get_dist_info
4、anaconda3/lib/python3.6/sitepackages/torch/lib/libtorch_python.so: undefined symbol: PySlice_Unpack
解决:将python3.6.0升级更高(3.6.2及以上)
5、KeyError: 'RANK'
解决:train.py
增加一个args.launcher = 'none'
# init distributed env first, since logger depends on the dist info.
args.launcher = 'none'
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
5、可视化
找到demo
文件夹下的visualize.ipynb
,或者利用下面代码:
from mmdet.apis import init_detector, inference_detector, show_result_pyplot
import mmcv
import os
config_file = '../configs/polarmask/4gpu/polar_768_1x_r50.py'
# 模型路径
checkpoint_file = '../tools/tools/work_dirs/polar_768_1x_r50_4gpu/epoch_7.pth'
model = init_detector(config_file, checkpoint_file, device='cuda:0')
# 可视化COCO数据集的test数据集的结果
# img_path = '/media/dell/Elements/dataset/MSCOCO/2017/test2017'
# img_names = os.listdir(img_path)
# for name in img_names:
# img = os.path.join(img_path, name)
# result = inference_detector(model, img)
# show_result_pyplot(img, result, model.CLASSES, score_thr=-1)
# 随便一张图
img = '1.jpg'
result = inference_detector(model, img)
show_result_pyplot(img, result, model.CLASSES, score_thr=0.3)
![](https://i-blog.csdnimg.cn/blog_migrate/54d04781bfd85ac8d6386a479b2f7b69.png)
6、代码结构
网络结构:backbone+neck+box_head,代码见
mmdet/models/detectors/polarmask.py
,列出主要代码class PolarMask(SingleStageDetector): def __init__(self, backbone, neck, bbox_head, train_cfg=None, test_cfg=None, pretrained=None): super(PolarMask, self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg, pretrained)
必选参数为backbone、neck、bbox_head,这里的参数均可以在
configs/polarmask/4gpu
中的文件中找到,通过tools/train/py
将参数传入。
bbox_head文件:实现掩膜生成的主要文件,位置在
mmdet/models/anchor_heads/polarmask_head.py
。查看__init__
代码,相关参数见注释:class PolarMask_Head(nn.Module): def __init__(self, num_classes, # 数据集类别数 in_channels, # 经过neck网络的特征输出层数 feat_channels=256, # head输出的特征层数 stacked_convs=4, # head包含的基础网络个数 strides=(4, 8, 16, 32, 64), regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 512), (512, INF)), use_dcn=False, mask_nms=False, # 以下定义loss,可以自动完成backforward loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict(type='IoULoss', loss_weight=1.0), loss_mask=dict(type='MaskIOULoss'), loss_centerness=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), conv_cfg=None, norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)): super(PolarMask_Head, self).__init__()
PolarMask_head的函数主要有以下:
除了上面的函数外,还有一个distance2mask
,就是根据论文中下式转成mask
:
数据集:
mmdet/datasets/coco_seg.py
主要的函数为prepare_train_img
函数,整体结构为:
Loss以及optimizer:在第一步中,通过增加
dict
完成。
训练网络:利用
tools/train.py
将参数传入,逐步调用即可。
验证网络:利用
tools/test.py
将参数传入,逐步调用即可。