mmdetection--ignore--训练纯背景图像

mmdetection/mmdet/datasets/coco.py

关于怎么进行ignore,在instances_train2017.json中是否存在ignore,存在bbox和对应的segm进行continue操作

    def _parse_ann_info(self, ann_info, with_mask=True):
        """Parse bbox and mask annotation.

        Args:
            ann_info (list[dict]): Annotation info of an image.
            with_mask (bool): Whether to parse mask annotations.

        Returns:
            dict: A dict containing the following keys: bboxes, bboxes_ignore,
                labels, masks, mask_polys, poly_lens.
        """
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        # Two formats are provided.
        # 1. mask: a binary map of the same size of the image.
        # 2. polys: each mask consists of one or several polys, each poly is a
        # list of float.
        if with_mask:
            gt_masks = []
            gt_mask_polys = []
            gt_poly_lens = []
        for i, ann in enumerate(ann_info):
            if ann.get('ignore', False):
                continue
            x1, y1, w, h = ann['bbox']
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue
            bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
            if ann['iscrowd']:
                gt_bboxes_ignore.append(bbox)
            else:
                gt_bboxes.append(bbox)
                gt_labels.append(self.cat2label[ann['category_id']])

然后:

mmdetection/mmdet/datasets/custom.py

 def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]
        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))

        # corruption
        if self.corruption is not None:
            img = corrupt(
                img,
                severity=self.corruption_severity,
                corruption_name=self.corruption)
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None

        ann = self.get_ann_info(idx)
        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        if self.with_crowd:
            gt_bboxes_ignore = ann['bboxes_ignore']

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0 and self.skip_img_without_anno:
            warnings.warn('Skip the image "%s" that has no valid gt bbox' %
                          osp.join(self.img_prefix, img_info['filename']))
            return None

        # extra augmentation
        if self.extra_aug is not None:
            img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
                                                       gt_labels)

        # apply transforms
        flip = True if np.random.rand() < self.flip_ratio else False
        # randomly sample a scale
        img_scale = random_scale(self.img_scales, self.multiscale_mode)
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
        img = img.copy()
        if self.with_seg:
            gt_seg = mmcv.imread(
                osp.join(self.seg_prefix,
                         img_info['filename'].replace('jpg', 'png')),
                flag='unchanged')
            gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
            gt_seg = mmcv.imrescale(
                gt_seg, self.seg_scale_factor, interpolation='nearest')
            gt_seg = gt_seg[None, ...]
        if self.proposals is not None:
            proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                            flip)
            proposals = np.hstack([proposals, scores
                                   ]) if scores is not None else proposals
        gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
                                        flip)
        if self.with_crowd:
            gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
                                                   scale_factor, flip)
        if self.with_mask:
            gt_masks = self.mask_transform(ann['masks'], pad_shape,
                                           scale_factor, flip)

 

 

 

 

一些增强样本,把背景输入:

  {
   "id": 50,
   "image_id": 47,
   "category_id": 1,
   "segmentation": [],
   "bbox": [],
   "iscrowd": 0,
   "area": 1.0
  },
  {
   "id": 51,
   "image_id": 48,
   "category_id": 1,
   "segmentation": [
    [
     61,
     72,
     59,
     78,
     58,
     84,
     58,
     89,
     61,
     92,
     64,
     90,
     67,
     83,
     68,
     77,
     66,
     72
    ]
   ],
   "bbox": [
    58,
    72,
    10,
    20
   ],
   "iscrowd": 0,
   "area": 1.0
  },
  {
   "id": 52,
   "image_id": 49,
   "category_id": 1,
   "segmentation": [],
   "bbox": [],
   "iscrowd": 0,
   "area": 1.0
  },
  {
   "id": 53,
   "image_id": 50,
   "category_id": 1,
   "segmentation": [],
   "bbox": [],
   "iscrowd": 0,
   "area": 1.0
  },
  {
   "id": 54,
   "image_id": 51,
   "category_id": 1,
   "segmentation": [],
   "bbox": [],
   "iscrowd": 0,
   "area": 1.0
  }

mmdetection/mmdet/datasets/coco.py

修改:

    def _parse_ann_info(self, ann_info, with_mask=True):
        """Parse bbox and mask annotation.

        Args:
            ann_info (list[dict]): Annotation info of an image.
            with_mask (bool): Whether to parse mask annotations.

        Returns:
            dict: A dict containing the following keys: bboxes, bboxes_ignore,
                labels, masks, mask_polys, poly_lens.
        """
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        # Two formats are provided.
        # 1. mask: a binary map of the same size of the image.
        # 2. polys: each mask consists of one or several polys, each poly is a
        # list of float.
        if with_mask:
            gt_masks = []
            gt_mask_polys = []
            gt_poly_lens = []

        if len(ann_info)==1 and len(ann_info[0]['bbox']) == 0:
            gt_bboxes = []
            gt_labels = []
            gt_bboxes_ignore = []
            gt_masks = []
            gt_mask_polys = []
            gt_poly_lens = []
            if gt_bboxes:
                gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
                gt_labels = np.array(gt_labels, dtype=np.int64)
            else:
                gt_bboxes = np.zeros((0, 4), dtype=np.float32)
                gt_labels = np.array([], dtype=np.int64)

            if gt_bboxes_ignore:
                gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
            else:
                gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)

            ann = dict(
                bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)

            if with_mask:
                ann['masks'] = gt_masks
                # poly format is not used in the current implementation
                ann['mask_polys'] = gt_mask_polys
                ann['poly_lens'] = gt_poly_lens
            return ann
        else:
            for i, ann in enumerate(ann_info):
                if ann.get('ignore', False):
                    continue
                x1, y1, w, h = ann['bbox']
                if ann['area'] <= 0 or w < 1 or h < 1:
                    continue
                bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
                if ann['iscrowd']:
                    gt_bboxes_ignore.append(bbox)
                else:
                    gt_bboxes.append(bbox)
                    gt_labels.append(self.cat2label[ann['category_id']])
                if with_mask:
                    gt_masks.append(self.coco.annToMask(ann))
                    mask_polys = [
                        p for p in ann['segmentation'] if len(p) >= 6
                    ]  # valid polygons have >= 3 points (6 coordinates)
                    poly_lens = [len(p) for p in mask_polys]
                    gt_mask_polys.append(mask_polys)
                    gt_poly_lens.extend(poly_lens)
            if gt_bboxes:
                gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
                gt_labels = np.array(gt_labels, dtype=np.int64)
            else:
                gt_bboxes = np.zeros((0, 4), dtype=np.float32)
                gt_labels = np.array([], dtype=np.int64)

            if gt_bboxes_ignore:
                gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
            else:
                gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)

            ann = dict(
                bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)

            if with_mask:
                ann['masks'] = gt_masks
                # poly format is not used in the current implementation
                ann['mask_polys'] = gt_mask_polys
                ann['poly_lens'] = gt_poly_lens
            return ann

 

mmdetection/mmdet/datasets/custom.py

   def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]
        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))

        ###yangninghua
        #import cv2
        #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #import cv2
        #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        # corruption
        if self.corruption is not None:
            img = corrupt(
                img,
                severity=self.corruption_severity,
                corruption_name=self.corruption)
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None

        ann = self.get_ann_info(idx)
        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        if self.with_crowd:
            gt_bboxes_ignore = ann['bboxes_ignore']

        # # skip the image if there is no valid gt bbox
        # if len(gt_bboxes) == 0 and self.skip_img_without_anno:
        #     warnings.warn('Skip the image "%s" that has no valid gt bbox' %
        #                   osp.join(self.img_prefix, img_info['filename']))
        #     return None
        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0 and self.skip_img_without_anno:
            # apply transforms
            flip = True if np.random.rand() < self.flip_ratio else False
            # randomly sample a scale
            img_scale = random_scale(self.img_scales, self.multiscale_mode)
            img, img_shape, pad_shape, scale_factor = self.img_transform(
                img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
            img = img.copy()

            if self.with_seg:
                gt_seg = mmcv.imread(
                    osp.join(self.seg_prefix,
                             img_info['filename'].replace('jpg', 'png')),
                    flag='unchanged')
                gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
                gt_seg = mmcv.imrescale(
                    gt_seg, self.seg_scale_factor, interpolation='nearest')
                gt_seg = gt_seg[None, ...]

            if self.proposals is not None:
                proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                                flip)
                proposals = np.hstack([proposals, scores
                                       ]) if scores is not None else proposals

            ori_shape = (img_info['height'], img_info['width'], 3)
            img_meta = dict(
                ori_shape=ori_shape,
                img_shape=img_shape,
                pad_shape=pad_shape,
                scale_factor=scale_factor,
                flip=flip)

            data = dict(
                img=DC(to_tensor(img), stack=True),
                img_meta=DC(img_meta, cpu_only=True),
                gt_bboxes=DC(to_tensor(gt_bboxes)))
            if self.proposals is not None:
                data['proposals'] = DC(to_tensor(proposals))
            if self.with_label:
                data['gt_labels'] = DC(to_tensor(gt_labels))
            if self.with_crowd:
                data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
            if self.with_mask:
                data['gt_masks'] = DC(ann['masks'], cpu_only=True)
            if self.with_seg:
                data['gt_semantic_seg'] = DC(to_tensor(gt_seg), stack=True)
            return data

        # extra augmentation
        if self.extra_aug is not None:
            img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
                                                       gt_labels)

        # apply transforms
        flip = True if np.random.rand() < self.flip_ratio else False
        # randomly sample a scale
        img_scale = random_scale(self.img_scales, self.multiscale_mode)
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
        img = img.copy()
        if self.with_seg:
            gt_seg = mmcv.imread(
                osp.join(self.seg_prefix,
                         img_info['filename'].replace('jpg', 'png')),
                flag='unchanged')
            gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
            gt_seg = mmcv.imrescale(
                gt_seg, self.seg_scale_factor, interpolation='nearest')
            gt_seg = gt_seg[None, ...]
        if self.proposals is not None:
            proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                            flip)
            proposals = np.hstack([proposals, scores
                                   ]) if scores is not None else proposals
        gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
                                        flip)
        if self.with_crowd:
            gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
                                                   scale_factor, flip)
        if self.with_mask:
            gt_masks = self.mask_transform(ann['masks'], pad_shape,
                                           scale_factor, flip)

        ori_shape = (img_info['height'], img_info['width'], 3)
        img_meta = dict(
            ori_shape=ori_shape,
            img_shape=img_shape,
            pad_shape=pad_shape,
            scale_factor=scale_factor,
            flip=flip)

        data = dict(
            img=DC(to_tensor(img), stack=True),
            img_meta=DC(img_meta, cpu_only=True),
            gt_bboxes=DC(to_tensor(gt_bboxes)))
        if self.proposals is not None:
            data['proposals'] = DC(to_tensor(proposals))
        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels))
        if self.with_crowd:
            data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
        if self.with_mask:
            data['gt_masks'] = DC(gt_masks, cpu_only=True)
        if self.with_seg:
            data['gt_semantic_seg'] = DC(to_tensor(gt_seg), stack=True)
        return data

 

转而经过:

 

    def __getitem__(self, idx):
        if self.test_mode:
            return self.prepare_test_img(idx)
        while True:
            data = self.prepare_train_img(idx)
            if data is None:
                idx = self._rand_another(idx)
                continue
            return data

 

 

 

进行训练时候的主流程:

mmdetection/mmdetection/tools/train.py

 

mmdetection/mmdetection/mmdet/apis/train.py

 

mmcv/runner/runner.py

    def run(self, data_loaders, workflow, max_epochs, **kwargs):
        """Start running.

        Args:
            data_loaders (list[:obj:`DataLoader`]): Dataloaders for training
                and validation.
            workflow (list[tuple]): A list of (phase, epochs) to specify the
                running order and epochs. E.g, [('train', 2), ('val', 1)] means
                running 2 epochs for training and 1 epoch for validation,
                iteratively.
            max_epochs (int): Total training epochs.
        """
        assert isinstance(data_loaders, list)
        assert mmcv.is_list_of(workflow, tuple)
        assert len(data_loaders) == len(workflow)

        self._max_epochs = max_epochs
        work_dir = self.work_dir if self.work_dir is not None else 'NONE'
        self.logger.info('Start running, host: %s, work_dir: %s',
                         get_host_info(), work_dir)
        self.logger.info('workflow: %s, max: %d epochs', workflow, max_epochs)
        self.call_hook('before_run')

        while self.epoch < max_epochs:
            for i, flow in enumerate(workflow):
                mode, epochs = flow
                if isinstance(mode, str):  # self.train()
                    if not hasattr(self, mode):
                        raise ValueError(
                            'runner has no method named "{}" to run an epoch'.
                            format(mode))
                    epoch_runner = getattr(self, mode)
                elif callable(mode):  # custom train()
                    epoch_runner = mode
                else:
                    raise TypeError('mode in workflow must be a str or '
                                    'callable function, not {}'.format(
                                        type(mode)))
                for _ in range(epochs):
                    if mode == 'train' and self.epoch >= max_epochs:
                        return
                    epoch_runner(data_loaders[i], **kwargs)

        time.sleep(1)  # wait for some hooks like loggers to finish
        self.call_hook('after_run')
epoch_runner(data_loaders[i], **kwargs)

mmcv/runner/runner.py

    def train(self, data_loader, **kwargs):
        self.model.train()
        self.mode = 'train'
        self.data_loader = data_loader
        self._max_iters = self._max_epochs * len(data_loader)
        self.call_hook('before_train_epoch')
        for i, data_batch in enumerate(data_loader):
            self._inner_iter = i
            self.call_hook('before_train_iter')
            outputs = self.batch_processor(
                self.model, data_batch, train_mode=True, **kwargs)
            if not isinstance(outputs, dict):
                raise TypeError('batch_processor() must return a dict')
            if 'log_vars' in outputs:
                self.log_buffer.update(outputs['log_vars'],
                                       outputs['num_samples'])
            self.outputs = outputs
            self.call_hook('after_train_iter')
            self._iter += 1

        self.call_hook('after_train_epoch')
        self._epoch += 1
outputs = self.batch_processor(
    self.model, data_batch, train_mode=True, **kwargs)

mmdetection/mmdet/apis/train.py

def batch_processor(model, data, train_mode):
    losses = model(**data)
    loss, log_vars = parse_losses(losses)

    outputs = dict(
        loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))

    return outputs
losses = model(**data)

torch/nn/modules/module.py

    def __call__(self, *input, **kwargs):
        for hook in self._forward_pre_hooks.values():
            hook(self, input)
        if torch._C._get_tracing_state():
            result = self._slow_forward(*input, **kwargs)
        else:
            result = self.forward(*input, **kwargs)
        for hook in self._forward_hooks.values():
            hook_result = hook(self, input, result)
            if hook_result is not None:
                raise RuntimeError(
                    "forward hooks should never return any values, but '{}'"
                    "didn't return None".format(hook))
        if len(self._backward_hooks) > 0:
            var = result
            while not isinstance(var, torch.Tensor):
                if isinstance(var, dict):
                    var = next((v for v in var.values() if isinstance(v, torch.Tensor)))
                else:
                    var = var[0]
            grad_fn = var.grad_fn
            if grad_fn is not None:
                for hook in self._backward_hooks.values():
                    wrapper = functools.partial(hook, self)
                    functools.update_wrapper(wrapper, hook)
                    grad_fn.register_hook(wrapper)
        return result
result = self.forward(*input, **kwargs)

torch/nn/parallel/data_parallel.py

    def forward(self, *inputs, **kwargs):
        if not self.device_ids:
            return self.module(*inputs, **kwargs)

        for t in chain(self.module.parameters(), self.module.buffers()):
            if t.device != self.src_device_obj:
                raise RuntimeError("module must have its parameters and buffers "
                                   "on device {} (device_ids[0]) but found one of "
                                   "them on device: {}".format(self.src_device_obj, t.device))

        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
        if len(self.device_ids) == 1:
            return self.module(*inputs[0], **kwargs[0])
        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
        outputs = self.parallel_apply(replicas, inputs, kwargs)
        return self.gather(outputs, self.output_device)
return self.module(*inputs[0], **kwargs[0])

torch/nn/modules/module.py

    def __call__(self, *input, **kwargs):
        for hook in self._forward_pre_hooks.values():
            hook(self, input)
        if torch._C._get_tracing_state():
            result = self._slow_forward(*input, **kwargs)
        else:
            result = self.forward(*input, **kwargs)
        for hook in self._forward_hooks.values():
            hook_result = hook(self, input, result)
            if hook_result is not None:
                raise RuntimeError(
                    "forward hooks should never return any values, but '{}'"
                    "didn't return None".format(hook))
        if len(self._backward_hooks) > 0:
            var = result
            while not isinstance(var, torch.Tensor):
                if isinstance(var, dict):
                    var = next((v for v in var.values() if isinstance(v, torch.Tensor)))
                else:
                    var = var[0]
            grad_fn = var.grad_fn
            if grad_fn is not None:
                for hook in self._backward_hooks.values():
                    wrapper = functools.partial(hook, self)
                    functools.update_wrapper(wrapper, hook)
                    grad_fn.register_hook(wrapper)
        return result
result = self.forward(*input, **kwargs)

mmdetection/mmdet/core/fp16/decorators.py

 def auto_fp16_wrapper(old_func):

        @functools.wraps(old_func)
        def new_func(*args, **kwargs):
            # check if the module has set the attribute `fp16_enabled`, if not,
            # just fallback to the original method.
            if not isinstance(args[0], torch.nn.Module):
                raise TypeError('@auto_fp16 can only be used to decorate the '
                                'method of nn.Module')
            if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
                return old_func(*args, **kwargs)
            # get the arg spec of the decorated method
            args_info = getfullargspec(old_func)
            # get the argument names to be casted
            args_to_cast = args_info.args if apply_to is None else apply_to
            # convert the args that need to be processed
            new_args = []
            # NOTE: default args are not taken into consideration
            if args:
                arg_names = args_info.args[:len(args)]
                for i, arg_name in enumerate(arg_names):
                    if arg_name in args_to_cast:
                        new_args.append(
                            cast_tensor_type(args[i], torch.float, torch.half))
                    else:
                        new_args.append(args[i])
            # convert the kwargs that need to be processed
            new_kwargs = {}
            if kwargs:
                for arg_name, arg_value in kwargs.items():
                    if arg_name in args_to_cast:
                        new_kwargs[arg_name] = cast_tensor_type(
                            arg_value, torch.float, torch.half)
                    else:
                        new_kwargs[arg_name] = arg_value
            # apply converted arguments to the decorated method
            output = old_func(*new_args, **new_kwargs)
            # cast the results back to fp32 if necessary
            if out_fp32:
                output = cast_tensor_type(output, torch.half, torch.float)
            return output

        return new_func

    return auto_fp16_wrapper
losses = model(**data)

mmdetection/mmdet/models/detectors/base.py

    @auto_fp16(apply_to=('img', ))
    def forward(self, img, img_meta, return_loss=True, **kwargs):
        if return_loss:
            return self.forward_train(img, img_meta, **kwargs)
        else:
            return self.forward_test(img, img_meta, **kwargs)
return self.forward_train(img, img_meta, **kwargs)

mmdetection/mmdet/models/detectors/two_stage.py

    def forward_train(self,
                      img,
                      img_meta,
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None):
        x = self.extract_feat(img)

        losses = dict()

        # RPN forward and loss
        if self.with_rpn:
            rpn_outs = self.rpn_head(x)
            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
                                          self.train_cfg.rpn)
            rpn_losses = self.rpn_head.loss(
                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
            losses.update(rpn_losses)

            proposal_cfg = self.train_cfg.get('rpn_proposal',
                                              self.test_cfg.rpn)
            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
        else:
            proposal_list = proposals

        # assign gts and sample proposals
        if self.with_bbox or self.with_mask:
            bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
            bbox_sampler = build_sampler(
                self.train_cfg.rcnn.sampler, context=self)
            num_imgs = img.size(0)
            if gt_bboxes_ignore is None:
                gt_bboxes_ignore = [None for _ in range(num_imgs)]
            sampling_results = []
            for i in range(num_imgs):
                assign_result = bbox_assigner.assign(proposal_list[i],
                                                     gt_bboxes[i],
                                                     gt_bboxes_ignore[i],
                                                     gt_labels[i])
                sampling_result = bbox_sampler.sample(
                    assign_result,
                    proposal_list[i],
                    gt_bboxes[i],
                    gt_labels[i],
                    feats=[lvl_feat[i][None] for lvl_feat in x])
                sampling_results.append(sampling_result)

        # bbox head forward and loss
        if self.with_bbox:
            rois = bbox2roi([res.bboxes for res in sampling_results])
            # TODO: a more flexible way to decide which feature maps to use
            bbox_feats = self.bbox_roi_extractor(
                x[:self.bbox_roi_extractor.num_inputs], rois)
            if self.with_shared_head:
                bbox_feats = self.shared_head(bbox_feats)
            cls_score, bbox_pred = self.bbox_head(bbox_feats)

            bbox_targets = self.bbox_head.get_target(sampling_results,
                                                     gt_bboxes, gt_labels,
                                                     self.train_cfg.rcnn)
            loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
                                            *bbox_targets)
            losses.update(loss_bbox)

        # mask head forward and loss
        if self.with_mask:
            if not self.share_roi_extractor:
                pos_rois = bbox2roi(
                    [res.pos_bboxes for res in sampling_results])
                mask_feats = self.mask_roi_extractor(
                    x[:self.mask_roi_extractor.num_inputs], pos_rois)
                if self.with_shared_head:
                    mask_feats = self.shared_head(mask_feats)
            else:
                pos_inds = []
                device = bbox_feats.device
                for res in sampling_results:
                    pos_inds.append(
                        torch.ones(
                            res.pos_bboxes.shape[0],
                            device=device,
                            dtype=torch.uint8))
                    pos_inds.append(
                        torch.zeros(
                            res.neg_bboxes.shape[0],
                            device=device,
                            dtype=torch.uint8))
                pos_inds = torch.cat(pos_inds)
                mask_feats = bbox_feats[pos_inds]
            mask_pred = self.mask_head(mask_feats)

            mask_targets = self.mask_head.get_target(sampling_results,
                                                     gt_masks,
                                                     self.train_cfg.rcnn)
            pos_labels = torch.cat(
                [res.pos_gt_labels for res in sampling_results])
            loss_mask = self.mask_head.loss(mask_pred, mask_targets,
                                            pos_labels)
            losses.update(loss_mask)

        return losses
# RPN forward and loss
if self.with_rpn:
    rpn_outs = self.rpn_head(x)
    rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
                                  self.train_cfg.rpn)
    rpn_losses = self.rpn_head.loss(
        *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)

mmdetection/mmdet/models/anchor_heads/rpn_head.py

    def loss(self,
             cls_scores,
             bbox_preds,
             gt_bboxes,
             img_metas,
             cfg,
             gt_bboxes_ignore=None):
        losses = super(RPNHead, self).loss(
            cls_scores,
            bbox_preds,
            gt_bboxes,
            None,
            img_metas,
            cfg,
            gt_bboxes_ignore=gt_bboxes_ignore)
        return dict(
            loss_rpn_cls=losses['loss_cls'], loss_rpn_bbox=losses['loss_bbox'])

后面接着::: 

mmdetection/mmdet/core/fp16/decorators.py", line 127, in new_func
    return old_func(*args, **kwargs)
mmdetection/mmdet/models/anchor_heads/anchor_head.py", line 179, in loss
    sampling=self.sampling)
mmdetection/mmdet/core/anchor/anchor_target.py", line 63, in anchor_target
    unmap_outputs=unmap_outputs)
mmdetection/mmdet/core/utils/misc.py", line 24, in multi_apply
    return tuple(map(list, zip(*map_results)))
mmdetection/mmdet/core/anchor/anchor_target.py", line 116, in anchor_target_single
    anchors, gt_bboxes, gt_bboxes_ignore, None, cfg)
mmdetection/mmdet/core/bbox/assign_sampling.py", line 30, in assign_and_sample
    gt_labels)
mmdetection/mmdet/core/bbox/assigners/max_iou_assigner.py", line 74, in assign
    raise ValueError('No gt or bboxes')

 

two_stage.py------rpn_head.py------anchor_head.py------anchor_target.py

 

 

搞了半天,原来他们已经开发了

请看mmdetection:https://github.com/open-mmlab/mmdetection/pull/1531

https://github.com/open-mmlab/mmdetection/issues/425

直接用就好,但一定是最新的master版本

 

https://github.com/open-mmlab/mmdetection/issues/2014

If you are using COCO dataset, you need to set filter_empty_gt = False. See here and here for implementations.

https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/custom.py#L44

https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py#L52

 

 

自己的标志转labelme转coco

把没有标注的图片,当成背景训练,有image_id

但是没有ann

# -*- coding: utf-8 -*-
import sys, getopt
import os
import json
import cv2
import random
import numpy as np
np.random.seed(41)
import glob
import shutil

def mkdir_os(path):
    if not os.path.exists(path):
        os.makedirs(path)

#coco数据类型转换
class Lableme2CoCo:
    def __init__(self, classname_to_id, jpgpng):
        self.images = []
        self.annotations = []
        self.categories = []
        self.img_id = 0
        self.ann_id = 0
        self.classname_to_id = classname_to_id
        self.jpgpng = jpgpng

    def save_coco_json(self, instance, save_path):
        import io
        #json.dump(instance, io.open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1)  # indent=2 更加美观显示
        with io.open(save_path, 'w', encoding="utf-8") as outfile:
            my_json_str = json.dumps(instance, ensure_ascii=False, indent=1)
            #python3 无
            # if isinstance(my_json_str, str):
            #     my_json_str = my_json_str.decode("utf-8")
            outfile.write(my_json_str)

    # 由json文件构建COCO
    def to_coco(self, json_path_list, ori_path_new):
        self._init_categories()
        for json_path in json_path_list:
            obj = self.read_jsonfile(json_path)
            self.images.append(self._image(obj, json_path, ori_path_new))
            shapes = obj['shapes']
            if (len(shapes)==1) and len(shapes[0]["points"])==0:
                self.img_id += 1
                continue
            for shape in shapes:
                annotation = self._annotation(shape)
                self.annotations.append(annotation)
                self.ann_id += 1
            self.img_id += 1

        instance = {}
        instance['info'] = 'spytensor created'
        instance['license'] = ['license']
        instance['images'] = self.images
        instance['annotations'] = self.annotations
        instance['categories'] = self.categories
        return instance

    # 构建类别
    def _init_categories(self):
        for k, v in self.classname_to_id.items():
            category = {}
            category['id'] = v
            category['name'] = k
            self.categories.append(category)

    # 构建COCO的image字段
    def _image(self, obj, path, ori_path):
        image = {}
        #from labelme import utils
        #img_x = utils.img_b64_to_arr(obj['imageData'])
        #---------------------------------------
        #print(str(obj['imagePath']))
        name = str(obj['imagePath']).split('/')[-1]
        newname = os.path.join(ori_path,name)
        img_x = cv2.imread(newname)
        if img_x is None:
            print('\nLableme2CoCo--error:')
            exit()
        h, w = img_x.shape[:-1]
        image['height'] = h
        image['width'] = w
        image['id'] = self.img_id
        image['file_name'] = os.path.basename(path).replace("json", self.jpgpng)
        return image

    # 构建COCO的annotation字段
    def _annotation(self, shape):
        label = shape['label']
        points = shape['points']
        annotation = {}
        annotation['id'] = self.ann_id
        annotation['image_id'] = self.img_id
        annotation['category_id'] = int(self.classname_to_id[label])
        if len(points) > 0:
            annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
            annotation['bbox'] = self._get_box(points)
        else:
            annotation['segmentation'] = []
            annotation['bbox'] = []
        if int(shape['flags']) == 0:
            annotation['iscrowd'] = 0
            #annotation['ignore'] = 0
        elif int(shape['flags']) == 1:
            annotation['iscrowd'] = 1
            #annotation['ignore'] = 1

        annotation['area'] = 1.0
        return annotation

    # 读取json文件,返回一个json对象
    def read_jsonfile(self, path):
        import io
        #with io.open(path, "r", encoding='utf-8') as f:
        with open(path, "r") as f:
            return json.load(f)

    # COCO的格式: [x1,y1,w,h] 对应COCO的bbox格式
    def _get_box(self, points):
        min_x = min_y = np.inf
        max_x = max_y = 0
        for x, y in points:
            min_x = min(min_x, x)
            min_y = min(min_y, y)
            max_x = max(max_x, x)
            max_y = max(max_y, y)
        return [min_x, min_y, max_x - min_x, max_y - min_y]



def main(argv):

    inputFlag = 1
    process2LabelmeFlag = 1

    if inputFlag:
        inputfile = ''
        jsonfile = ''
        classname = ''
        outputfile = ''

        try:
            opts, args = getopt.getopt(argv,"hi:j:c:o:",["ifile=","jfile=","cname=","ofile="])
        except getopt.GetoptError:
            print('test.py -i <inputfile> -j <jsonfile> -c <classname> -o <outputfile>')
            sys.exit(2)
        for opt, arg in opts:
            if opt == '-h':
                print('test.py -i <inputfile> -j <jsonfile> -c <classname> -o <outputfile>')
                sys.exit()
            elif opt in ("-i", "--ifile"):
                inputfile = arg
            elif opt in ("-j", "--jfile"):
                jsonfile = arg
            elif opt in ("-c", "--cname"):
                classname = arg
            elif opt in ("-o", "--ofile"):
                outputfile = arg
    else:
        inputfile = './seg_marker0123_photoneo'
        jsonfile = 'seg_marker0123_photoneo_20200211_112204.json'
        classname = 'luosi'
        outputfile = './coco'

    print('\n输入的文件为:', inputfile)
    print('\n输入的json为:', jsonfile)
    print('\nclassname为:', classname)
    print('\n输出的文件为:', outputfile)

    save_train = "./seg/train"
    save_test = "./seg/test"
    json_train = "./json/train"
    json_test = "./json/test"
    mkdir_os(save_train)
    mkdir_os(save_test)
    mkdir_os(json_train)
    mkdir_os(json_test)
    mkdir_os(outputfile)

    #0为背景
    classname_to_id = {classname: 1}

    if process2LabelmeFlag:
        # 获取数据
        data = []
        with open(jsonfile) as f:
            for line in f:
                data.append(json.loads(line))
        num = 0
        lendata_num = 0
        count = len(data)
        trainimg = os.listdir(inputfile)
        # 遍历获取的数据
        train_txt = open('train.txt',"w")
        for lab in range(count):
            onedate = data[lab]
            name = onedate["url_image"]
            name = str(name).split("/")[-1]
            jpgpng = name.split(".")[-1]
            if name not in trainimg:
                if name.replace(".jpg",".png") not in trainimg:
                    continue
                else:
                    name = name.replace(".jpg",".png")
                    jpgpng = "png"
            img = cv2.imread(os.path.join(inputfile,name))
            if img is None:
                continue
            temp_hh,temp_ww = img.shape[:2]
            hh = temp_hh
            ww = temp_ww

            #train-test 随机数
            tempNum = random.randint(1,10)

            point_size = 3
            thickness = 4
            if(len(onedate["result"])==0):
                #continue
                json_jpg = {}
                json_jpg["imagePath"] = str(os.path.join(inputfile, name))
                json_jpg["imageData"] = None
                shapes = []

                points = []
                one_shape = {}
                one_shape["line_color"] = None
                one_shape["shape_type"] = "polygon"
                one_shape["points"] = points
                one_shape["flags"] = 0

                one_shape["fill_color"] = None
                one_shape["label"] = classname
                shapes.append(one_shape)

                json_jpg["shapes"] = shapes
                json_jpg["version"] = "3.16.7"
                json_jpg["flags"] = {}
                json_jpg["fillColor"] = [
                    255,
                    0,
                    0,
                    128
                ]
                json_jpg["lineColor"] = [
                    0,
                    255,
                    0,
                    128
                ]
                json_jpg["imageWidth"] = ww
                json_jpg["imageHeight"] = hh
                # jsonData = json.dumps(json_jpg, ensure_ascii=False, indent=1)
                jsonData = json.dumps(json_jpg, indent=1)
                jsonname = name.split(".")[0]
                jsonname = jsonname + ".json"
                # 分割的保存
                if tempNum == 1 or tempNum == 2:
                    cv2.imwrite(os.path.join(save_test, name), img)
                    fileObject = open(os.path.join(json_test, jsonname), 'w')
                    fileObject.write(jsonData)
                    fileObject.close()
                else:
                    cv2.imwrite(os.path.join(save_train, name), img)
                    fileObject = open(os.path.join(json_train, jsonname), 'w')
                    fileObject.write(jsonData)
                    fileObject.close()

            elif 'data' in onedate["result"] or 'data' in onedate["result"][0]:

                json_jpg={}
                json_jpg["imagePath"] = str(os.path.join(inputfile,name))
                json_jpg["imageData"] = None
                shapes=[]

                for key in range(len(onedate["result"])):
                    ndata = onedate["result"][key]["data"]
                    if len(ndata)< 8:
                        continue


                    points=[]
                    # ignore 黄色标出
                    if onedate["result"][key]["tagtype"] in "purpose2":
                        for k in range(len(ndata)//2):
                            cv2.circle(img, (ndata[2*k],ndata[2*k+1]), point_size, (0, 255, 255), thickness)
                            points.append([ndata[2*k],ndata[2*k+1]])
                    # add 红色标出
                    elif onedate["result"][key]["tagtype"] in "purpose1":
                        for k in range(len(ndata)//2):
                            cv2.circle(img, (ndata[2*k],ndata[2*k+1]), point_size, (0, 0, 255), thickness)
                            points.append([ndata[2*k],ndata[2*k+1]])
                    # 特殊情况 蓝色标出
                    else:
                        for k in range(len(ndata)//2):
                            cv2.circle(img, (ndata[2*k],ndata[2*k+1]), point_size, (255, 0, 0), thickness)
                            points.append([ndata[2*k],ndata[2*k+1]])

                    one_shape = {}
                    one_shape["line_color"] = None
                    one_shape["shape_type"] = "polygon"
                    one_shape["points"] = points
                    # 判断是否是ignore或者特殊情况,给出flag
                    # ignore
                    if onedate["result"][key]["tagtype"] in "purpose2":
                        one_shape["flags"] = 1
                    # add
                    elif onedate["result"][key]["tagtype"] in "purpose1":
                        one_shape["flags"] = 0
                    # 特殊情况
                    else:
                        one_shape["flags"] = 1

                    one_shape["fill_color"] = None
                    one_shape["label"] = classname
                    shapes.append(one_shape)

                json_jpg["shapes"] = shapes
                json_jpg["version"] = "3.16.7"
                json_jpg["flags"] = {}
                json_jpg["fillColor"] = [
                                            255,
                                            0,
                                            0,
                                            128
                                        ]
                json_jpg["lineColor"] = [
                                            0,
                                            255,
                                            0,
                                            128
                                        ]
                json_jpg["imageWidth"] = ww
                json_jpg["imageHeight"] = hh
                #jsonData = json.dumps(json_jpg, ensure_ascii=False, indent=1)
                jsonData = json.dumps(json_jpg, indent=1)
                jsonname = name.split(".")[0]
                jsonname = jsonname+".json"
                #分割的保存
                if tempNum == 1 or tempNum == 2 or tempNum == 3 or tempNum == 4:
                    cv2.imwrite(os.path.join(save_test,name),img)
                    fileObject = open(os.path.join(json_test,jsonname), 'w')
                    fileObject.write(jsonData)
                    fileObject.close()
                else:
                    cv2.imwrite(os.path.join(save_train,name),img)
                    fileObject = open(os.path.join(json_train,jsonname), 'w')
                    fileObject.write(jsonData)
                    fileObject.close()
            else:
                continue
            txtname = name.split(".")[0]
            train_txt.write(txtname)
            train_txt.write("\n")
            num += 1
            print(num,"/",count)
        print("lendata_num:",num)
        train_txt.close()
    else:
        jpgpng = "png"
        print('\njpgpng文件后缀人为设置为,如出bug需要修改:', jpgpng)

    # 创建文件
    if not os.path.exists("%s/annotations/"%outputfile):
        os.makedirs("%s/annotations/"%outputfile)
    if not os.path.exists("%s/train2017/"%outputfile):
        os.makedirs("%s/train2017"%outputfile)
    if not os.path.exists("%s/val2017/"%outputfile):
        os.makedirs("%s/val2017"%outputfile)

    # 获取images目录下所有的joson文件列表
    json_list_train = glob.glob(json_train + "/*.json")
    # 获取images目录下所有的joson文件列表
    json_list_test = glob.glob(json_test + "/*.json")

    print("train_n:", len(json_list_train), 'val_n:', len(json_list_test))

    # 把训练集转化为COCO的json格式
    if len(json_list_train):
        l2c_train = Lableme2CoCo(classname_to_id, jpgpng)
        train_instance = l2c_train.to_coco(json_list_train, inputfile)
        l2c_train.save_coco_json(train_instance, '%s/annotations/instances_train2017.json'%outputfile)
        for file in json_list_train:
            name = file.split('/')[-1]
            name = os.path.join(inputfile,name)
            shutil.copy(name.replace("json", jpgpng),"%s/train2017/"%outputfile)

    if len(json_list_test):
        # 把验证集转化为COCO的json格式
        l2c_val = Lableme2CoCo(classname_to_id, jpgpng)
        val_instance = l2c_val.to_coco(json_list_test, inputfile)
        l2c_val.save_coco_json(val_instance, '%s/annotations/instances_val2017.json'%outputfile)
        for file in json_list_test:
            name = file.split('/')[-1]
            name = os.path.join(inputfile,name)
            shutil.copy(name.replace("json", jpgpng),"%s/val2017/"%outputfile)

if __name__ == "__main__":
   main(sys.argv[1:])

 

  • 2
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值