上一章:深度篇——实例分割(二) 细说 mask rcnn 实例分割代码 训练自己数据
论文地址:《Mask R-CNN》
作者代码地址:Mask R-CNN code
我优化的代码地址:mask_rcnn_pro
本小节,细说 mask rcnn 实例分割代码 训练自己数据 相关网络,数据处理,工具等
六. 相关网络,数据处理,工具 代码
下面的代码携带有注释,思考一下,理解基本不难的。不好理解的地方,直接 debug 去看。更立体。
1.相关工具文件
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
# ============================================
# @Time : 2020/05/13 22:57
# @Author : WanDaoYi
# @FileName : image_utils.py
# ============================================
import numpy as np
import skimage.color
import skimage.io
import skimage.transform
from distutils.version import LooseVersion
from config import cfg
class ImageUtils(object):
def __init__(self):
self.mean_pixel = np.array(cfg.COMMON.MEAN_PIXEL)
pass
def parse_image_meta_graph(self, meta):
"""
Parses a tensor that contains image attributes to its components.
See compose_image_meta() for more details.
:param meta: [batch, meta length] where meta length depends on NUM_CLASSES
:return: Returns a dict of the parsed tensors.
"""
image_id = meta[:, 0]
original_image_shape = meta[:, 1:4]
image_shape = meta[:, 4:7]
window = meta[:, 7:11] # (y1, x1, y2, x2) window of image in in pixels
scale = meta[:, 11]
active_class_ids = meta[:, 12:]
return {
"image_id": image_id,
"original_image_shape": original_image_shape,
"image_shape": image_shape,
"window": window,
"scale": scale,
"active_class_ids": active_class_ids,
}
pass
def compose_image_meta(self, image_id, original_image_shape, image_shape,
window, scale, active_class_ids):
"""
Takes attributes of an image and puts them in one 1D array.
:param image_id: An int ID of the image. Useful for debugging.
:param original_image_shape: [H, W, C] before resizing or padding.
:param image_shape: [H, W, C] after resizing and padding
:param window: (y1, x1, y2, x2) in pixels. The area of the image where the real
image is (excluding the padding)
:param scale: The scaling factor applied to the original image (float32)
:param active_class_ids: List of class_ids available in the dataset from which
the image came. Useful if training on images from multiple datasets
where not all classes are present in all datasets.
:return:
"""
meta = np.array([image_id] + # size=1
list(original_image_shape) + # size=3
list(image_shape) + # size=3
list(window) + # size=4 (y1, x1, y2, x2) in image cooredinates
[scale] + # size=1
list(active_class_ids) # size=class_num
)
return meta
pass
def load_image(self, image_path):
"""
Load the specified image and return a [H,W,3] Numpy array.
:param image_path: image path
:return:
"""
# Load image
image = skimage.io.imread(image_path)
# If grayscale. Convert to RGB for consistency.
if image.ndim != 3:
image = skimage.color.gray2rgb(image)
# If has an alpha channel, remove it for consistency
if image.shape[-1] == 4:
image = image[..., :3]
return image
pass
def mold_image(self, images, mean_pixel):
"""
Expects an RGB image (or array of images) and subtracts
the mean pixel and converts it to float. Expects image
colors in RGB order.
:param images:
:param mean_pixel:
:return:
"""
return images.astype(np.float32) - np.array(mean_pixel)
pass
def mode_input(self, images_info_list):
"""
Takes a list of images and modifies them to the format expected
as an input to the neural network.
:param images_info_list: List of image matrices [height,width,depth]. Images can have
different sizes.
:return: returns 3 Numpy matrices:
molded_images_list: [N, h, w, 3]. Images resized and normalized.
image_metas_list: [N, length of meta data]. Details about each image.
windows_list: [N, (y1, x1, y2, x2)]. The portion of the image that has the
original image (padding excluded).
"""
molded_images_list = []
image_metas_list = []
windows_list = []
image_mi_dim = cfg.COMMON.IMAGE_MIN_DIM
image_max_dim = cfg.COMMON.IMAGE_MAX_DIM
image_min_scale = cfg.COMMON.IMAGE_MIN_SCALE
image_resize_mode = cfg.COMMON.IMAGE_RESIZE_MODE
for image_info in images_info_list:
# resize image
molded_image, window, scale, padding, crop = self.resize_image(image_info,
min_dim=image_mi_dim,
min_scale=image_min_scale,
max_dim=image_max_dim,
resize_mode=image_resize_mode)
molded_image = self.mold_image(molded_image, self.mean_pixel)
# Build image_meta
image_meta = self.compose_image_meta(0, image_info.shape, molded_image.shape, window, scale,
np.zeros([cfg.COMMON.CLASS_NUM], dtype=np.int32))
# Append
molded_images_list.append(molded_image)
image_metas_list.append(image_meta)
windows_list.append(window)
pass
# Pack into arrays
molded_images_list = np.stack(molded_images_list)
image_metas_list = np.stack(image_metas_list)
windows_list = np.stack(windows_list)
return molded_images_list, image_metas_list, windows_list
pass
def resize(self, image, output_shape, order=1, resize_mode="constant", cval=0, clip=True,
preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
"""
A wrapper for Scikit-Image resize().
Scikit-Image generates warnings on every call to resize() if it doesn't
receive the right parameters. The right parameters depend on the version
of skimage. This solves the problem by using different parameters per
version. And it provides a central place to control resizing defaults.
:param image:
:param output_shape:
:param order:
:param resize_mode:
:param cval:
:param clip:
:param preserve_range:
:param anti_aliasing:
:param anti_aliasing_sigma:
:return:
"""
if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
# New in 0.14: anti_aliasing. Default it to False for backward
# compatibility with skimage 0.13.
return skimage.transform.resize(image, output_shape,
order=order, mode=resize_mode, cval=cval, clip=clip,
preserve_range=preserve_range, anti_aliasing=anti_aliasing,
anti_aliasing_sigma=anti_aliasing_sigma)
else:
return skimage.transform.resize(image, output_shape,
order=order, mode=resize_mode, cval=cval, clip=clip,
preserve_range=preserve_range)
pass
def resize_image(self, image, min_dim=None, max_dim=None, min_scale=None, resize_mode="square"):
"""
resize an image keeping the aspect ratio unchanged.
:param image:
:param min_dim: if provided, resize the image such that it's smaller dimension == min_dim
:param max_dim: if provided, ensures that the image longest side doesn't
exceed this value.
:param min_scale: if provided, ensure that the image is scaled up by at least
this percent even if min_dim doesn't require it.
:param resize_mode: resizing mode.
none: No resizing. Return the image unchanged.
square: Resize and pad with zeros to get a square image
of size [max_dim, max_dim].
pad64: Pads width and height with zeros to make them multiples of 64.
If min_dim or min_scale are provided, it scales the image up
before padding. max_dim is ignored in this mode.
The multiple of 64 is needed to ensure smooth scaling of feature
maps up and down the 6 levels of the FPN pyramid (2**6=64).
crop: Picks random crops from the image. First, scales the image based
on min_dim and min_scale, then picks a random crop of
size min_dim x min_dim. Can be used in training only.
max_dim is not used in this mode.
:return:
image: the resized image
window: (y1, x1, y2, x2). If max_dim is provided, padding might
be inserted in the returned image. If so, this window is the
coordinates of the image part of the full image (excluding
the padding). The x2, y2 pixels are not included.
scale: The scale factor used to resize the image
padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
"""
# Keep track of image dtype and return results in the same dtype
image_dtype = image.dtype
# Default window (y1, x1, y2, x2) and default scale == 1.
h, w = image.shape[:2]
window = (0, 0, h, w)
scale = 1
padding = [(0, 0), (0, 0), (0, 0)]
crop = None
if resize_mode == "none":
return image, window, scale, padding, crop
pass
# Scale?
if min_dim:
# Scale up but not down
scale = max(1, min_dim / min(h, w))
pass
if min_scale and scale < min_scale:
scale = min_scale
pass
# Does it exceed max dim?
if max_dim and resize_mode == "square":
image_max = max(h, w)
if round(image_max * scale) > max_dim:
scale = max_dim / image_max
pass
pass
# Resize image using bilinear interpolation
if scale != 1:
image = self.resize(image, (round(h * scale), round(w * scale)), preserve_range=True)
pass
# Need padding or cropping?
if resize_mode == "square":
# Get new height and width
h, w = image.shape[:2]
top_pad = (max_dim - h) // 2
bottom_pad = max_dim - h - top_pad
left_pad = (max_dim - w) // 2
right_pad = max_dim - w - left_pad
padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
image = np.pad(image, padding, mode='constant', constant_values=0)
window = (top_pad, left_pad, h + top_pad, w + left_pad)
pass
elif resize_mode == "pad64":
h, w = image.shape[:2]
# Both sides must be divisible by 64
assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
# Height
if h % 64 > 0:
max_h = h - (h % 64) + 64
top_pad = (max_h - h) // 2
bottom_pad = max_h - h - top_pad
else:
top_pad = bottom_pad = 0
# Width
if w % 64 > 0:
max_w = w - (w % 64) + 64
left_pad = (max_w - w) // 2
right_pad = max_w - w - left_pad
else:
left_pad = right_pad = 0
padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
image = np.pad(image, padding, mode='constant', constant_values=0)
window = (top_pad, left_pad, h + top_pad, w + left_pad)
pass
elif resize_mode == "crop":
# Pick a random crop
h, w = image.shape[:2]
y = np.random.randint(0, (h - min_dim))
x = np.random.randint(0, (w - min_dim))
crop = (y, x, min_dim, min_dim)
image = image[y:y + min_dim, x:x + min_dim]
window = (0, 0, min_dim, min_dim)
pass
else:
raise Exception("Mode {} not supported".format(resize_mode))
pass
return image.astype(image_dtype), window, scale, padding, crop
pass
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
# ============================================
# @Time : 2020/05/13 12:06
# @Author : WanDaoYi
# @FileName : misc_utils.py
# ============================================
import math
import numpy as np
import tensorflow as tf
from utils.bbox_utils import BboxUtil
from config import cfg
class MiscUtils(object):
def __init__(self):
self.bbox_util = BboxUtil()
pass
def compute_backbone_shapes(self, image_shape, backbone_strides):
"""
Computes the width and height of each stage of the backbone network
:param image_shape: [h, w, c]
:param backbone_strides: The strides of each layer of the FPN Pyramid.
These values are based on a resNet101 backbone.
:return: [N, (height, width)]. Where N is the number of stages
"""
return np.array(
[[int(math.ceil(image_shape[0] / stride)),
int(math.ceil(image_shape[1] / stride))] for stride in backbone_strides])
pass
def batch_slice(self, inputs, graph_fn, batch_size, names=None):
"""
Splits inputs into slices and feeds each slice to a copy of the given
computation graph and then combines the results. It allows you to run a
graph on a batch of inputs even if the graph is written to support one
instance only.
:param inputs: list of tensors. All must have the same first dimension length
:param graph_fn: A function that returns a TF tensor that's part of a graph.
:param batch_size: number of slices to divide the data into.
:param names: If provided, assigns names to the resulting tensors.
:return:
"""
if not isinstance(inputs, list):
inputs = [inputs]
outputs = []
for i in range(batch_size):
inputs_slice = [x[i] for x in inputs]
output_slice = graph_fn(*inputs_slice)
if not isinstance(output_slice, (tuple, list)):
output_slice = [output_slice]
outputs.append(output_slice)
# Change outputs from a list of slices where each is
# a list of outputs to a list of outputs and each has
# a list of slices
outputs = list(zip(*outputs))
if names is None:
names = [None] * len(outputs)
result = [tf.stack(o, axis=0, name=n)
for o, n in zip(outputs, names)]
if len(result) == 1:
result = result[0]
return result
pass
def trim_zeros_graph(self, boxes, name='trim_zeros'):
"""
Often boxes are represented with matrices of shape [N, 4] and
are padded with zeros. This removes zero boxes.
:param boxes: [N, 4] matrix of boxes.
:param name:
:return: non_zeros: [N] a 1D boolean mask identifying the rows to keep
"""
non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool)
boxes = tf.boolean_mask(boxes, non_zeros, name=name)
return boxes, non_zeros
pass
def detection_targets_graph(self, proposals, gt_class_ids, gt_boxes, gt_masks):
"""
Generates detection targets for one image. Subsamples proposals and
generates target class IDs, bounding box deltas, and masks for each.
:param proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates.
Might be zero padded if there are not enough proposals.
:param gt_class_ids: [MAX_GT_INSTANCES] int class IDs
:param gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.
:param gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type.
:return: Target ROIs and corresponding class IDs, bounding box shifts, and masks.
rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))]
masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox
boundaries and resized to neural network output size.
Note: Returned arrays might be zero padded if not enough target ROIs.
"""
# Assertions
asserts = [tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ]
with tf.control_dependencies(asserts):
proposals = tf.identity(proposals)
pass
# Remove zero padding
proposals, _ = self.trim_zeros_graph(proposals, name="trim_proposals")
gt_boxes, non_zeros = self.trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids")
gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, name="trim_gt_masks")
# Handle COCO crowds
# A crowd box in COCO is a bounding box around several instances. Exclude
# them from training. A crowd box is given a negative class ID.
crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
crowd_boxes = tf.gather(gt_boxes, crowd_ix)
gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)
# Compute overlaps matrix [proposals, gt_boxes]
overlaps = self.bbox_util.overlaps_graph(proposals, gt_boxes)
# Compute overlaps with crowd boxes [proposals, crowd_boxes]
crowd_overlaps = self.bbox_util.overlaps_graph(proposals, crowd_boxes)
crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
no_crowd_bool = (crowd_iou_max < 0.001)
# Determine positive and negative ROIs
roi_iou_max = tf.reduce_max(overlaps, axis=1)
# 1. Positive ROIs are those with >= 0.5 IoU with a GT box
positive_roi_bool = (roi_iou_max >= 0.5)
positive_indices = tf.where(positive_roi_bool)[:, 0]
# 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds.
negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]
# Subsample ROIs. Aim for 33% positive
# Positive ROIs
positive_count = int(cfg.TRAIN.ROIS_PER_IMAGE * cfg.TRAIN.ROI_POSITIVE_RATIO)
positive_indices = tf.random_shuffle(positive_indices)[:positive_count]
positive_count = tf.shape(positive_indices)[0]
# Negative ROIs. Add enough to maintain positive:negative ratio.
r = 1.0 / cfg.TRAIN.ROI_POSITIVE_RATIO
negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
negative_indices = tf.random_shuffle(negative_indices)[:negative_count]
# Gather selected ROIs
positive_rois = tf.gather(proposals, positive_indices)
negative_rois = tf.gather(proposals, negative_indices)
# Assign positive ROIs to GT boxes.
positive_overlaps = tf.gather(overlaps, positive_indices)
roi_gt_box_assignment = tf.cond(
tf.greater(tf.shape(positive_overlaps)[1], 0),
true_fn=lambda: tf.argmax(positive_overlaps, axis=1),
false_fn=lambda: tf.cast(tf.constant([]), tf.int64)
)
roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
# Compute bbox refinement for positive ROIs
deltas = self.bbox_util.box_refinement_graph(positive_rois, roi_gt_boxes)
deltas /= np.array(cfg.COMMON.BBOX_STD_DEV)
# Assign positive ROIs to GT masks
# Permute masks to [N, height, width, 1]
transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
# Pick the right mask for each ROI
roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)
# Compute mask targets
boxes = positive_rois
if cfg.TRAIN.USE_MINI_MASK:
# Transform ROI coordinates from normalized image space
# to normalized mini-mask space.
y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
gt_h = gt_y2 - gt_y1
gt_w = gt_x2 - gt_x1
y1 = (y1 - gt_y1) / gt_h
x1 = (x1 - gt_x1) / gt_w
y2 = (y2 - gt_y1) / gt_h
x2 = (x2 - gt_x1) / gt_w
boxes = tf.concat([y1, x1, y2, x2], 1)
box_ids = tf.range(0, tf.shape(roi_masks)[0])
masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32),
boxes, box_ids,
cfg.TRAIN.MASK_SHAPE)
# Remove the extra dimension from masks.
masks = tf.squeeze(masks, axis=3)
# Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
# binary cross entropy loss.
masks = tf.round(masks)
# Append negative ROIs and pad bbox deltas and masks that
# are not used for negative ROIs with zeros.
rois = tf.concat([positive_rois, negative_rois], axis=0)
N = tf.shape(negative_rois)[0]
P = tf.maximum(cfg.TRAIN.ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
rois = tf.pad(rois, [(0, P), (0, 0)])
# roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])
return rois, roi_gt_class_ids, deltas, masks
pass
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
# ============================================
# @Time : 2020/05/01 00:22
# @Author : WanDaoYi
# @FileName : mask_util.py
# ============================================
import warnings
import numpy as np
import scipy.ndimage
from utils.image_utils import ImageUtils
from pycocotools import mask as coco_mask_utils
from config import cfg
class MaskUtil(object):
def __init__(self):
self.coco_model_url = cfg.COMMON.COCO_MODEL_URL
self.image_utils = ImageUtils()
pass
# 计算两个 masks 的 IOU 重叠率
def compute_overlaps_masks(self, masks1, masks2):
"""
:param masks1: [Height, Width, instances]
:param masks2: [Height, Width, instances]
:return: 两个 masks 的 IOU 重叠率
"""
# 如果其中一个 masks 为空,则返回 空 结果
mask_flag = masks1.shape[-1] == 0 or masks2.shape[-1] == 0
if mask_flag:
return np.zeros((masks1.shape[-1], masks2.shape[-1]))
pass
# 将 masks 扁平化后并计算它们的面积
masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
area1 = np.sum(masks1, axis=0)
area2 = np.sum(masks2, axis=0)
# intersections and union
intersections = np.dot(masks1.T, masks2)
union = area1[:, None] + area2[None, :] - intersections
overlaps = intersections / union
return overlaps
pass
def annotation_2_mask(self, annotation, height, width):
"""
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
:param annotation: annotation info
:param height: image info of height
:param width: image info of width
:return: binary mask (numpy 2D array)
"""
segment = annotation['segmentation']
if isinstance(segment, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = coco_mask_utils.frPyObjects(segment, height, width)
rle = coco_mask_utils.merge(rles)
pass
elif isinstance(segment['counts'], list):
# uncompressed RLE
rle = coco_mask_utils.frPyObjects(segment, height, width)
pass
else:
# rle
rle = segment['segmentation']
pass
mask = coco_mask_utils.decode(rle)
return mask
pass
def load_mask(self, data, image_id):
"""
Load instance masks for the given image.
Different datasets use different ways to store masks. This
function converts the different mask format to one format
in the form of a bitmap [height, width, instances].
:param data: The Dataset object to pick data from
:param image_id: image id of image
:return:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
image_info = data.image_info_list[image_id]
instance_masks = []
class_ids = []
annotations = data.image_info_list[image_id]["annotations"]
# Build mask of shape [height, width, instance_count] and list
# of class IDs that correspond to each channel of the mask.
for annotation in annotations:
class_id = data.class_from_source_map["coco.{}".format(annotation['category_id'])]
if class_id:
m = self.annotation_2_mask(annotation, image_info["height"], image_info["width"])
# Some objects are so small that they're less than 1 pixel area
# and end up rounded out. Skip those objects.
if m.max() < 1:
continue
pass
# Is it a crowd? If so, use a negative class ID.
if annotation['iscrowd']:
# Use negative class ID for crowds
class_id *= -1
# For crowd masks, annToMask() sometimes returns a mask
# smaller than the given dimensions. If so, resize it.
if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
instance_masks.append(m)
class_ids.append(class_id)
pass
mask = np.stack(instance_masks, axis=2).astype(np.bool)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
pass
def resize_mask(self, mask, scale, padding, crop=None):
"""
resize a mask using the given scale and padding.
Typically, you get the scale and padding from resize_image() to
ensure both, the image and the mask, are resized consistently.
:param mask:
:param scale: mask scaling factor
:param padding: Padding to add to the mask in the form
[(top, bottom), (left, right), (0, 0)]
:param crop:
:return:
"""
# Suppress warning from scipy 0.13.0, the output shape of zoom() is
# calculated with round() instead of int()
with warnings.catch_warnings():
warnings.simplefilter("ignore")
mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
if crop is not None:
y, x, h, w = crop
mask = mask[y:y + h, x:x + w]
else:
mask = np.pad(mask, padding, mode='constant', constant_values=0)
return mask
pass
def minimize_mask(self, bbox, mask, mini_shape):
"""
Resize masks to a smaller version to reduce memory load.
Mini-masks can be resized back to image scale using expand_masks()
:param bbox:
:param mask:
:param mini_shape:
:return:
"""
# 避免 传参 过来 是 list,在 cfg.TRAIN.MINI_MASK_SHAPE 获得的是 list
mini_shape = tuple(mini_shape)
mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
for i in range(mask.shape[-1]):
# Pick slice and cast to bool in case load_mask() returned wrong dtype
m = mask[:, :, i].astype(bool)
y1, x1, y2, x2 = bbox[i][:4]
m = m[y1:y2, x1:x2]
if m.size == 0:
raise Exception("Invalid bounding box with area of zero&