数据标注
- w :开始选择区域
- a:上一张图片
- d:下一张图片
MMDetection
项目下载
环境创建
# 创建conda
conda create -n open-mmlab python=3.7 -y
conda activate open-mmlab
# pytorch 安装 # 也可以直接迁移一个环境conda create -n New --clone Old
conda install pytorch torchvision -c pytorch # conda install pytorch cudatoolkit=10.1 torchvision -c pytorch
# 环境安装
pip install openmim
mim install mmdet
# 报错处理,安装验证等详见 https://github.com/open-mmlab/mmdetection/blob/master/docs/zh_cn/get_started.md/#Installation
简单示例
#%%
from mmdet.apis import init_detector, inference_detector, show_result_pyplot
import mmcv
#%% 需要从url中下载pth文件
config_file = '../configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
# download the checkpoint from model zoo and put it in `checkpoints/`
# url: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth
checkpoint_file = '../checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
#%%
# build the model from a config file and a checkpoint file
model = init_detector(config_file, checkpoint_file, device='cuda:0')
#%%
# test a single image
img = 'demo.jpg'
result = inference_detector(model, img)
#%%
# show the results
show_result_pyplot(model, img, result)
有关数据准备的说明
COCO格式数据集
{
"id": 19, #
"width": 1280,
"height": 720,
"file_name": "013351.jpg", # 文件名称
"license": "",
"flickr_url": "",
"coco_url": "",
"date_captured": ""
},
配置文件
- 超级多的配置文件,config中包含所有,训练时只需要运行train语句即可
- 打开fast-rcnn的一个配置文件
模型配置
比如:fastrcnn引用了’…/base/models/fast_rcnn_r50_fpn.py’
…/base/models/fast_rcnn_r50_fpn.py的配置为:
# model settings
model = dict(
type='FastRCNN',
backbone=dict( # 去model文件夹里的backbone文件夹
type='ResNet',# __init__文件里登记了type
depth=50,# 以下就是模型的参数
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)),
test_cfg=dict(
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100)))
数据配置
data = dict(
samples_per_gpu=2,# 每个gpu上的batch_size 总batch= samples_per_gpu* numbergpu
workers_per_gpu=2,# 一个GPU几个进程加载数据
train=dict(
proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',
pipeline=train_pipeline),
val=dict(
proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
pipeline=test_pipeline),
test=dict(
proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
pipeline=test_pipeline))
最基础的coco配置
# dataset settings
dataset_type = 'CocoDataset'# 那种数据集类型
data_root = 'data/coco/'# 数据路径
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
# train的数据处理
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),# 保持宽高比 1333, 800是大小的限制,不是resize到的目标大小
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),# 训练时需要收集的数据信息
]
# test的数据处理
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type, # 这里就用到了dataset_type
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
evaluation = dict(interval=1, metric='bbox')# 验证的间隔,每训练一个epoch就验证一次 , 评测的标准是 bbox
ann_file
训练设置
# model training and testing settings
train_cfg=dict(
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)),
test_cfg=dict(
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100)))
参考与更多
https://www.bilibili.com/video/BV1Hp4y1y788?
MMDetection中文文档—详解
MMDetection中文文档—1.安 装
MMDetection中文文档—2.入门