半监督学习实战记录
使用mmdetection和softTeacher
SSL(Semi-Supervised Learning)半监督学习,由于其可以合理利用大量无标注数据的属性,一直是CV研究的一个重要方向。
论文地址:https://arxiv.org/pdf/2106.09018.pdf
Github地址:https://github.com/microsoft/SoftTeacher
参考论文解析和源码解读:https://zhuanlan.zhihu.com/p/437754834
参考实战项目:https://github.com/Ruikang07/SoftTeacher_nodule_detection
准备数据
该算法是基于fasterRcnn来实现目标检测的,而SoftTeacher通过逐步迭代的学习方式,不断将当前阶段可信的伪标签加入训练。我这个项目是使用10%的标注数据来进行训练,其余进行伪标签训练,所以需要准备标注的数据和未标注的数据集。
配置文件
我直接参考了实战项目的配置文件,并且进行了修改,具体如下:
model = dict(
type='SoftTeacher',
model=dict(
type='FasterRCNN',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='SyncBN', requires_grad=False),
norm_eval=True,
style='caffe',
init_cfg=dict(
type='Pretrained',
checkpoint='open-mmlab://detectron2/resnet50_caffe')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=2,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.5),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100))),
train_cfg=dict(
use_teacher_proposal=False,
pseudo_label_initial_score_thr=0.5,
rpn_pseudo_threshold=0.9,
cls_pseudo_threshold=0.9,
reg_pseudo_threshold=0.02,
jitter_times=10,
jitter_scale=0.06,
min_pseduo_box_size=0,
unsup_weight=2.0),
test_cfg=dict(inference_on='student'))
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[103.53, 116.28, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Sequential',
transforms=[
dict(
type='RandResize',
img_scale=[(1333, 400), (1333, 1200)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandFlip', flip_ratio=0.5),
dict(
type='OneOf',
transforms=[
dict(type='Identity'),
dict(type='AutoContrast'),
dict(type='RandEqualize'),
dict(type='RandSolarize'),
dict(type='RandColor'),
dict(type='RandContrast'),
dict(type='RandBrightness'),
dict(type='RandSharpness'),
dict(type='RandPosterize')
])
],
record=True),
dict(type='Pad', size_divisor=32),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ExtraAttrs', tag='sup'),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels'],
meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg',
'pad_shape', 'scale_factor', 'tag'))
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type='SemiDataset',
sup=dict(
type='CocoDataset',
ann_file='data/coco/annotations/instances_train2017.json',
img_prefix='data/coco/train2017/',
classes = ('cat', 'dog'),
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Sequential',
transforms=[
dict(
type='RandResize',
img_scale=[(1333, 400), (1333, 1200)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandFlip', flip_ratio=0.5),
dict(
type='OneOf',
transforms=[
dict(type='Identity'),
dict(type='AutoContrast'),
dict(type='RandEqualize'),
dict(type='RandSolarize'),
dict(type='RandColor'),
dict(type='RandContrast'),
dict(type='RandBrightness'),
dict(type='RandSharpness'),
dict(type='RandPosterize')
])
],
record=True),
dict(type='Pad', size_divisor=32),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ExtraAttrs', tag='sup'),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels'],
meta_keys=('filename', 'ori_shape', 'img_shape',
'img_norm_cfg', 'pad_shape', 'scale_factor',
'tag'))
],
filter_empty_gt=False),
unsup=dict(
type='CocoDataset',
ann_file='data/coco/annotations/instances_unlabeled2017.json',
img_prefix='data/coco/unlabeled2017/',
classes = ('cat', 'dog'),
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='PseudoSamples', with_bbox=True),
dict(
type='MultiBranch',
unsup_student=[
dict(
type='Sequential',
transforms=[
dict(
type='RandResize',
img_scale=[(1333, 400), (1333, 1200)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandFlip', flip_ratio=0.5),
dict(
type='ShuffledSequential',
transforms=[
dict(
type='OneOf',
transforms=[
dict(type='Identity'),
dict(type='AutoContrast'),
dict(type='RandEqualize'),
dict(type='RandSolarize'),
dict(type='RandColor'),
dict(type='RandContrast'),
dict(type='RandBrightness'),
dict(type='RandSharpness'),
dict(type='RandPosterize')
]),
dict(
type='OneOf',
transforms=[{
'type': 'RandTranslate',
'x': (-0.1, 0.1)
}, {
'type': 'RandTranslate',
'y': (-0.1, 0.1)
}, {
'type': 'RandRotate',
'angle': (-30, 30)
},
[{
'type':
'RandShear',
'x': (-30, 30)
}, {
'type':
'RandShear',
'y': (-30, 30)
}]])
]),
dict(
type='RandErase',
n_iterations=(1, 5),
size=[0, 0.2],
squared=True)
],
record=True),
dict(type='Pad', size_divisor=32),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ExtraAttrs', tag='unsup_student'),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels'],
meta_keys=('filename', 'ori_shape', 'img_shape',
'img_norm_cfg', 'pad_shape',
'scale_factor', 'tag',
'transform_matrix'))
],
unsup_teacher=[
dict(
type='Sequential',
transforms=[
dict(
type='RandResize',
img_scale=[(1333, 400), (1333, 1200)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandFlip', flip_ratio=0.5)
],
record=True),
dict(type='Pad', size_divisor=32),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ExtraAttrs', tag='unsup_teacher'),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels'],
meta_keys=('filename', 'ori_shape', 'img_shape',
'img_norm_cfg', 'pad_shape',
'scale_factor', 'tag',
'transform_matrix'))
])
],
filter_empty_gt=False)),
val=dict(
type='CocoDataset',
ann_file='data/coco/annotations/instances_val2017.json',
img_prefix='data/coco/val2017/',
classes = ('cat', 'dog'),
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
],
filter_empty_gt=False),
test=dict(
type='CocoDataset',
ann_file='data/coco/annotations/instances_val2017.json',
img_prefix='data/coco/val2017/',
classes = ('cat', 'dog'),
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]),
sampler=dict(
train=dict(
type='DistributedGroupSemiBalanceSampler',
sample_ratio=[1, 9],
by_prob=True,
epoch_length=2000)))
evaluation = dict(interval=10000, metric='bbox', type='SubModulesDistEvalHook')
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[2000, 4000])
runner = dict(type='IterBasedRunner', max_iters=5000)
checkpoint_config = dict(interval=200, by_epoch=False, max_keep_ckpts=20)
log_config = dict(
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
custom_hooks = [
dict(type='NumClassCheckHook'),
dict(type='WeightSummary'),
dict(type='MeanTeacher', momentum=0.999, interval=1, warm_up=0)
]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
mmdet_base = '../../thirdparty/mmdetection/configs/_base_'
strong_pipeline = [
dict(
type='Sequential',
transforms=[
dict(
type='RandResize',
img_scale=[(1333, 400), (1333, 1200)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandFlip', flip_ratio=0.5),
dict(
type='ShuffledSequential',
transforms=[
dict(
type='OneOf',
transforms=[
dict(type='Identity'),
dict(type='AutoContrast'),
dict(type='RandEqualize'),
dict(type='RandSolarize'),
dict(type='RandColor'),
dict(type='RandContrast'),
dict(type='RandBrightness'),
dict(type='RandSharpness'),
dict(type='RandPosterize')
]),
dict(
type='OneOf',
transforms=[{
'type': 'RandTranslate',
'x': (-0.1, 0.1)
}, {
'type': 'RandTranslate',
'y': (-0.1, 0.1)
}, {
'type': 'RandRotate',
'angle': (-30, 30)
},
[{
'type': 'RandShear',
'x': (-30, 30)
}, {
'type': 'RandShear',
'y': (-30, 30)
}]])
]),
dict(
type='RandErase',
n_iterations=(1, 5),
size=[0, 0.2],
squared=True)
],
record=True),
dict(type='Pad', size_divisor=32),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ExtraAttrs', tag='unsup_student'),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels'],
meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg',
'pad_shape', 'scale_factor', 'tag', 'transform_matrix'))
]
weak_pipeline = [
dict(
type='Sequential',
transforms=[
dict(
type='RandResize',
img_scale=[(1333, 400), (1333, 1200)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandFlip', flip_ratio=0.5)
],
record=True),
dict(type='Pad', size_divisor=32),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ExtraAttrs', tag='unsup_teacher'),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels'],
meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg',
'pad_shape', 'scale_factor', 'tag', 'transform_matrix'))
]
unsup_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='PseudoSamples', with_bbox=True),
dict(
type='MultiBranch',
unsup_student=[
dict(
type='Sequential',
transforms=[
dict(
type='RandResize',
img_scale=[(1333, 400), (1333, 1200)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandFlip', flip_ratio=0.5),
dict(
type='ShuffledSequential',
transforms=[
dict(
type='OneOf',
transforms=[
dict(type='Identity'),
dict(type='AutoContrast'),
dict(type='RandEqualize'),
dict(type='RandSolarize'),
dict(type='RandColor'),
dict(type='RandContrast'),
dict(type='RandBrightness'),
dict(type='RandSharpness'),
dict(type='RandPosterize')
]),
dict(
type='OneOf',
transforms=[{
'type': 'RandTranslate',
'x': (-0.1, 0.1)
}, {
'type': 'RandTranslate',
'y': (-0.1, 0.1)
}, {
'type': 'RandRotate',
'angle': (-30, 30)
},
[{
'type': 'RandShear',
'x': (-30, 30)
}, {
'type': 'RandShear',
'y': (-30, 30)
}]])
]),
dict(
type='RandErase',
n_iterations=(1, 5),
size=[0, 0.2],
squared=True)
],
record=True),
dict(type='Pad', size_divisor=32),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ExtraAttrs', tag='unsup_student'),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels'],
meta_keys=('filename', 'ori_shape', 'img_shape',
'img_norm_cfg', 'pad_shape', 'scale_factor', 'tag',
'transform_matrix'))
],
unsup_teacher=[
dict(
type='Sequential',
transforms=[
dict(
type='RandResize',
img_scale=[(1333, 400), (1333, 1200)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandFlip', flip_ratio=0.5)
],
record=True),
dict(type='Pad', size_divisor=32),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='ExtraAttrs', tag='unsup_teacher'),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels'],
meta_keys=('filename', 'ori_shape', 'img_shape',
'img_norm_cfg', 'pad_shape', 'scale_factor', 'tag',
'transform_matrix'))
])
]
fp16 = dict(loss_scale='dynamic')
work_dir = './work_dirs/myModel2'
cfg_name = 'soft_teacher_faster_rcnn_r50_caffe_fpn_coco_nodule'
load_from = 'iter_180000.pth'
# gpu_ids = range(0, 1)
pipeline的部分基本可以无视掉,主要修改model和data以及其他的一些参数。
训练
在windows的cmd上,输入python tools/train.py myModel.py
就可以训练了。
预测
在windows的cmd上,输入python demo/image_demo.py 'D:\\learn\\AI\\SoftTeacher-main\\data\\coco\\testJPEG\\*.jpg' myModel2.py work_dirs\\myModel2\\latest.pth --output work_dirs/
就可以预测。
预测结果:
但是有些预测结果不太理想,比如:
可能还需要更多的数据来训练,或者是更多的epoch,会有更好的效果。
遇到的问题
- 配置文件的evaluation,训练到interval的值时候,要进行评估,就会报错,改了好久都不行,所以我直接无视了evaluation。
- data里train的
classes = ('cat', 'dog')
,要分别加到sup和unsup里面,因为这是两个不同的数据集,看源码解读可以知道,train返回两个dataset,所以需要两个classes。 - 预测的时候
glob.glob(args.img)
识别不了,也不知道是什么问题,所以我直接在源文件里注明了路径 - 制作未标注的数据集的json文件,只需要images信息,不需要annotations信息。