AI 实战营基础班_作业二_mmdetection.

临街的小孩

已于 2023-02-11 15:15:13 修改

阅读量177

点赞数

分类专栏： openMMLab 文章标签：深度学习人工智能 Powered by 金山文档

于 2023-02-11 01:00:31 首次发布

本文链接：https://blog.csdn.net/W25679/article/details/128978882

版权

openMMLab 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

安装环境

！pip install openmim
! mim install mmdet

下载模型

!mim download mmdet --config mask_rcnn_x101_32x4d_fpn_2x_coco

修改配置文件，使用继承的方式

mim train mmdet balloon.py

balloon.py

_base_ = ['mask_rcnn_x101_32x4d_fpn_2x_coco.py']

data = dict(
    
      train=dict(
        ann_file='/openbayes/home/train_data.json',
        img_prefix='/openbayes/input/input0/balloon/train',
        classes = ("balloon",)
      ),
      val=dict(
        ann_file='/openbayes/home/val_data.json',
        img_prefix='/openbayes/input/input0/balloon/val',
        classes = ("balloon",)
      ),
      test=dict(
        ann_file='/openbayes/home/val_data.json',
        img_prefix='/openbayes/input/input0/balloon/val',
        classes = ("balloon",)
      )   
)

model = dict(
    roi_head=dict(
        bbox_head=dict(
            num_classes=1
        ),
        mask_head=dict(
            num_classes=1
        )
    )
)

python data2cocos ann_file, out_file, image_prefix

ann_file 标注文件

Outfile 保存文件

image_prefix image路径

data2cocos.py

import os.path as osp
import sys, mmcv

def convert_balloon_to_coco(ann_file, out_file, image_prefix):
    data_infos = mmcv.load(ann_file)
    annotations = []
    images = []
    obj_count = 0
    for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
        filename = v['filename']
        img_path = osp.join(image_prefix, filename)
        height, width = mmcv.imread(img_path).shape[:2]
        images.append(dict(
            id=idx,
            file_name=filename,
            height=height,
            width=width))
        bboxes = []
        labels = []
        masks = []
        for _, obj in v['regions'].items():
            assert not obj['region_attributes']
            obj = obj['shape_attributes']
            px = obj['all_points_x']
            py = obj['all_points_y']
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]
            x_min, y_min, x_max, y_max = (
                min(px), min(py), max(px), max(py))
            data_anno = dict(
                image_id=idx,
                id=obj_count,
                category_id=0,
                bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
                area=(x_max - x_min) * (y_max - y_min),
                segmentation=[poly],
                iscrowd=0)
            annotations.append(data_anno)
            obj_count += 1
    coco_format_json = dict(
        images=images,
        annotations=annotations,
        categories=[{'id':0, 'name': 'balloon'}])
    mmcv.dump(coco_format_json, out_file)

if __name__ == '__main__':
    ann_file = sys.argv[1]
    out_file = sys.argv[2]
    image_prefix = sys.argv[3]
    convert_balloon_to_coco(ann_file, out_file, image_prefix)

 python mask_video.py test_video.mp4 /openbayes/home/video.mp4 /openbayes/home/work_dirs/balloon/epoch_20.pth balloon.py 5

mask_video.py

import os, sys, math, cv2, mmcv, numpy
from mmdet.apis import inference_detector, init_detector

def mask_draw(src_file, dst_file, mod_file, conf_file, threshold, device):
    model = init_detector(conf_file, mod_file, device=device)
    score_thr = threshold
    frames = mmcv.VideoReader(src_file)
    fourcc = "XVID"
    fps = math.ceil(frames.fps)
    resolution = (frames.width, frames.height)
    vwriter = cv2.VideoWriter(dst_file, cv2.VideoWriter_fourcc(*fourcc), fps, resolution)
    for fid in mmcv.track_iter_progress(range(len(frames))):
        if frames[fid] is None:
            continue
        result = inference_detector(model, frames[fid])
        if len(result[0][0]) == 0:
            continue
        mask = numpy.zeros(result[1][0][0].shape, dtype=bool)
        for bid in range(len(result[0][0])):
            if result[0][0][bid][4] < score_thr:
                continue
            mask = numpy.add(mask, result[1][0][bid])
        img = frames[fid].copy()
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        for i in range(resolution[1]):
            for j in range(resolution[0]):
                if not mask[i][j]:
                    img[i][j] = [gray[i][j]]*3
        vwriter.write(img)
    vwriter.release()

if __name__ == '__main__':
    src_file = sys.argv[1]
    dst_file = sys.argv[2]
    mod_file = sys.argv[3]
    conf_file = sys.argv[4]
    threshold = float(sys.argv[5])
    device="cuda:0"
    if len(sys.argv) > 6:
        device = sys.argv[6]
    mask_draw(src_file, dst_file, mod_file, conf_file, threshold, device)