半监督目标检测复现
`相关:最近再做半监督目标检测的课题,记录一下当前一些算法的复现,目前仅复现了D2系的unbiased-teacher 和改进版mix-unmix算法,mmcv系的soft teacher ,pseco和Instant-teacher。后续复现成功后会有补充。文中的方式是基于ubuntu 系统的conda安装的COCO数据集格式。
D2系: 1-unbiased-teacher
1、根据readme创建基础环境
#create conda env
这里原文使用的python为3.6,但由于第二步中d2需要python 3.7以上的版本,所以这里更改为3.7
conda create -n detectron2 python=3.7
#activate the enviorment
conda activate detectron2
#install PyTorch >=1.5 with GPU,这里要和d2所需环境保持一致,所以需要pytorch1.8以上的版本,具体安装指令参考pytorch官网。pytorch历代版本官网
conda install pytorch torchvision -c pytorch
2、根据detectron2安装d2
这里就按照官网进行安装即可
方法1:python -m pip install ‘git+https://github.com/facebookresearch/detectron2.git’
#(add --user if you don’t have permission)
#Or, to install it from a local clone:
方法2:git clone https://github.com/facebookresearch/detectron2.git
python -m pip install -e detectron2
#On macOS, you may need to prepend the above commands with a few environment variables:
方法3:CC=clang CXX=clang++ ARCHFLAGS=“-arch x86_64” python -m pip install …
3、安装好上述环境之后,copy一下原文的代码:
https://github.com/facebookresearch/unbiased-teacher
4、训练自己的数据集:
本文的方式是将数据集信息直接添加到代码中的train_net.py
第一步:更改train_net.py
#!/usr/bin/env python3
#Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import os
import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.engine import default_argument_parser, default_setup, launch
from ubteacher import add_ubteacher_config
from ubteacher.engine.trainer import UBTeacherTrainer, BaselineTrainer
#hacky way to register
from ubteacher.modeling.meta_arch.rcnn import TwoStagePseudoLabGeneralizedRCNN
from ubteacher.modeling.proposal_generator.rpn import PseudoLabRPN
from ubteacher.modeling.roi_heads.roi_heads import StandardROIHeadsPseudoLab
import ubteacher.data.datasets.builtin
##############################################
从这里开始添加注册数据集信息
#############################################
from ubteacher.modeling.meta_arch.ts_ensemble import EnsembleTSModel
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets.coco import load_coco_json
import pycocotools
#声明类别,尽量保持
CLASS_NAMES =["类别"]
#数据集路径
DATASET_ROOT = ''
ANN_ROOT = os.path.join(DATASET_ROOT, 'annotations/')
#图像路径
TRAIN_PATH = os.path.join(DATASET_ROOT, 'images')
VAL_PATH = os.path.join(DATASET_ROOT, 'images')
#coco数据集的json标注路径
TRAIN_LABEL_JSON = os.path.join(ANN_ROOT, 'instances_train2014.json')
TRAIN_UNLABEL_JSON = os.path.join(ANN_ROOT, 'instances_train2014_unlabel.json')
#VAL_JSON = os.path.join(ANN_ROOT, 'val.json')
VAL_JSON = os.path.join(ANN_ROOT, 'instances_val2014.json')
#声明数据集的子集
PREDEFINED_SPLITS_DATASET = {
"coco_train_label": (TRAIN_PATH, TRAIN_LABEL_JSON),
"coco_train_unlabel": (TRAIN_PATH, TRAIN_UNLABEL_JSON),
"coco_val": (VAL_PATH, VAL_JSON),
}
#=============================
#注册数据集和元数据
def plain_register_dataset():
#训练集
DatasetCatalog.register("coco_train_label", lambda: load_coco_json(TRAIN_LABEL_JSON, TRAIN_PATH))
MetadataCatalog.get("coco_train_label").set(thing_classes=CLASS_NAMES, # 可以选择开启,但是不能显示中文,这里需要注意,中文的话最好关闭
evaluator_type='coco', # 指定评估方式
json_file=TRAIN_LABEL_JSON,
image_root=TRAIN_PATH)
DatasetCatalog.register("coco_train_unlabel", lambda: load_coco_json(TRAIN_UNLABEL_JSON, TRAIN_PATH))
MetadataCatalog.get("coco_train_unlabel").set(thing_classes=CLASS_NAMES, # 可以选择开启,但是不能显示中文,这里需要注意,中文的话最好关闭
evaluator_type='coco', # 指定评估方式
json_file=TRAIN_UNLABEL_JSON,
image_root=TRAIN_PATH)
#DatasetCatalog.register("coco_my_val", lambda: load_coco_json(VAL_JSON, VAL_PATH, "coco_2017_val"))
#验证/测试集
DatasetCatalog.register("coco_val", lambda: load_coco_json(VAL_JSON, VAL_PATH))
MetadataCatalog.get("coco_val").set(thing_classes=CLASS_NAMES, # 可以选择开启,但是不能显示中文,这里需要注意,中文的话最好关闭
evaluator_type='coco', # 指定评估方式
json_file=VAL_JSON,
image_root=VAL_PATH)
###############################################
def setup(args):
"""
Create configs and perform basic setups.
"""
cfg = get_cfg()
add_ubteacher_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
default_setup(cfg, args)
return cfg
def main(args):
cfg = setup(args)
#新增运行数据集的注册
plain_register_dataset()
if cfg.SEMISUPNET.Trainer == "ubteacher":
Trainer = UBTeacherTrainer
elif cfg.SEMISUPNET.Trainer == "baseline":
Trainer = BaselineTrainer
else:
raise ValueError("Trainer Name is not found.")
if args.eval_only:
if cfg.SEMISUPNET.Trainer == "ubteacher":
model = Trainer.build_model(cfg)
model_teacher = Trainer.build_model(cfg)
ensem_ts_model = EnsembleTSModel(model_teacher, model)
DetectionCheckpointer(
ensem_ts_model, save_dir=cfg.OUTPUT_DIR
).resume_or_load(cfg.MODEL.WEIGHTS, resume=args.resume)
res = Trainer.test(cfg, ensem_ts_model.modelTeacher)
else:
model = Trainer.build_model(cfg)
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
cfg.MODEL.WEIGHTS, resume=args.resume
)
res = Trainer.test(cfg, model)
return res
trainer = Trainer(cfg)
trainer.resume_or_load(resume=args.resume)
return trainer.train()
if __name__ == "__main__":
args = default_argument_parser().parse_args()
print("Command Line Args:", args)
launch(
main,
args.num_gpus,
num_machines=args.num_machines,
machine_rank=args.machine_rank,
dist_url=args.dist_url,
args=(args,),
)
第二步,修改/mnt/sdd/zhanghexiang/unbiased-teacher/configs/coco_supervision/faster_rcnn_R_50_FPN_sup10_run1.yaml配置文件,将训练数据集的来源由原来的
替换为:
注意:这里的名字要和train_net.py里面注册的名字保持一致。然后就可以按照readme里面的指令进行训练了。我的话用的是2张卡,所以是:
根据卡的大小调一下batch就行。
CUDA_VISIBLE_DEVICES=3,4 nohup python train_net.py --num-gpus 2 --config configs/coco_supervision/faster_rcnn_R_50_FPN_sup10_run1.yaml SOLVER.IMG_PER_BATCH_LABEL 4 SOLVER.IMG_PER_BATCH_UNLABEL 4