ModuleNotFoundError: No module named ‘podm.metrics‘报错等解决方法

原创已于 2025-01-10 22:18:46 修改 · 668 阅读

3 ·

CC 4.0 BY-SA版权

文章标签：

#python #开发语言

于 2025-01-10 19:53:25 首次发布

ModuleNotFoundError: No module named 'podm.metrics’报错等解决方法

podm.metrics

在运行时报错：

ModuleNotFoundError: No module named ‘podm.metrics’

安装了podm后还是报错
解决方法：

查看安装位置

查看podm的安装位置，并打开到该位置(ctrl+点击位置)：

pip show podm

在这里插入图片描述

新建metrics文件

在该位置下找到podm文件夹，发现其中没有metrics.py文件，故新建该文件

在这里插入图片描述
并将一下代码粘贴在文件中：

import sys
from collections import Counter, defaultdict
from enum import Enum
from typing import List, Dict, Any, Tuple

import numpy as np

from podm import box
from podm.coco import PCOCOObjectDetectionDataset, PCOCOBoundingBox, PCOCOSegments


class BoundingBox(box.Box):
    """
    image: image.
    category: category.
    xtl: the X top-left coordinate of the bounding box.
    ytl: the Y top-left coordinate of the bounding box.
    xbr: the X bottom-right coordinate of the bounding box.
    ybr: the Y bottom-right coordinate of the bounding box.
    score: (optional) the confidence of the detected class.
    """
    def __init__(self):
        super(BoundingBox, self).__init__()
        self.image = None
        self.category = None
        self.score = None  # type: float or None

    @classmethod
    def of_bbox(cls, image, category, xtl: float, ytl: float, xbr: float, ybr: float, score: float = None) \
            -> 'BoundingBox':
        bbox = BoundingBox()
        bbox.xtl = xtl
        bbox.ytl = ytl
        bbox.xbr = xbr
        bbox.ybr = ybr
        bbox.image = image
        bbox.score = score
        bbox.category = category
        return bbox


def get_bounding_boxes(dataset: PCOCOObjectDetectionDataset, use_name: bool = True) -> List[BoundingBox]:
    bboxes = []
    for ann in dataset.annotations:
        if isinstance(ann, PCOCOBoundingBox):
            bb = BoundingBox.of_bbox(ann.image_id, ann.category_id, ann.xtl, ann.ytl, ann.xbr, ann.ybr, ann.score)
        elif isinstance(ann, PCOCOSegments):
            bb = BoundingBox.of_bbox(ann.image_id, ann.category_id,
                                     ann.bbox.xtl, ann.bbox.ytl, ann.bbox.xbr, ann.bbox.ybr, ann.score)
        else:
            raise TypeError
        if use_name:
            bb.image = dataset.get_image(id=ann.image_id).file_name
            bb.category = dataset.get_category(id=ann.category_id).name
        bboxes.append(bb)
    return bboxes


class MethodAveragePrecision(Enum):
    """
    Class representing if the coordinates are relative to the
    image size or are absolute values.

        Developed by: Rafael Padilla
        Last modification: Apr 28 2018
    """
    AllPointsInterpolation = 1
    ElevenPointsInterpolation = 2


class MetricPerClass:
    def __init__(self):
        self.label = None
        self.precision = None
        self.recall = None
        self.ap = None
        self.interpolated_precision = None  # type: None or np.ndarray
        self.interpolated_recall = None  # type: None or np.ndarray
        self.num_groundtruth = None
        self.num_detection = None
        self.tp = None
        self.fp = None

    @staticmethod
    def mAP(results: Dict[Any, 'MetricPerClass']):
        return np.average([m.ap for m in results.values() if m.num_groundtruth > 0])


def get_pascal_voc_metrics(gold_standard: List[BoundingBox],
                           predictions: List[BoundingBox],
                           iou_threshold: float = 0.5,
                           method: MethodAveragePrecision = MethodAveragePrecision.AllPointsInterpolation
                           ) -> Dict[str, MetricPerClass]:
    """Get the metrics used by the VOC Pascal 2012 challenge.

    Args:
        gold_standard: ground truth bounding boxes;
        predictions: detected bounding boxes;
        iou_threshold: IOU threshold indicating which detections will be considered TP or FP (default value = 0.5);
        method: It can be calculated as the implementation in the official PASCAL VOC toolkit (EveryPointInterpolation),
            or applying the 11-point interpolation as described in the paper "The PASCAL Visual Object Classes(VOC)
            Challenge" or AllPointsInterpolation" (ElevenPointInterpolation);
    Returns:
        A dictionary containing metrics of each class.
    """
    ret = {}  # list containing metrics (precision, recall, average precision) of each class

    # Get all classes
    categories = sorted(set(b.category for b in gold_standard + predictions))

    # Precision x Recall is obtained individually by each class
    # Loop through by classes
    for category in categories:
        preds = [b for b in predictions if b.category == category]  # type: List[BoundingBox]
        golds = [b for b in gold_standard if b.category == category]  # type: List[BoundingBox]
        npos = len(golds)

        # sort detections by decreasing confidence
        preds = sorted(preds, key=lambda b: b.score, reverse=True)
        tps = np.zeros(len(preds))
        fps = np.zeros(len(preds))

        # create dictionary with amount of gts for each image
        counter = Counter([cc.image for cc in golds])
        for key, val in counter.items():
            counter[key] = np.zeros(val)

        # Pre-processing groundtruths of the some image
        image_name2gt = defaultdict(list)
        for b in golds:
            image_name2gt[b.image].append(b)

        # Loop through detections
        for i in range(len(preds)):
            # Find ground truth image
            gt = image_name2gt[preds[i].image]
            max_iou = sys.float_info.min
            mas_idx = -1
            for j in range(len(gt)):
                iou = box.intersection_over_union(preds[i], gt[j])
                if iou > max_iou:
                    max_iou = iou
                    mas_idx = j
            # Assign detection as true positive/don't care/false positive
            if max_iou >= iou_threshold:
                if counter[preds[i].image][mas_idx] == 0:
                    tps[i] = 1  # count as true positive
                    counter[preds[i].image][mas_idx] = 1  # flag as already 'seen'
                else:
                    # - A detected "cat" is overlaped with a GT "cat" with IOU >= IOUThreshold.
                    fps[i] = 1  # count as false positive
            else:
                fps[i] = 1  # count as false positive
        # compute precision, recall and average precision
        cumulative_fps = np.cumsum(fps)
        cumulative_tps = np.cumsum(tps)
        recalls = np.divide(cumulative_tps, npos, out=np.full_like(cumulative_tps, np.nan), where=npos != 0)
        precisions = np.divide(cumulative_tps, (cumulative_fps + cumulative_tps))
        # Depending on the method, call the right implementation
        if method == MethodAveragePrecision.AllPointsInterpolation:
            ap, mrec, mpre, _ = calculate_all_points_average_precision(recalls, precisions)
        else:
            ap, mrec, mpre = calculate_11_points_average_precision(recalls, precisions)
        # add class result in the dictionary to be returned
        r = MetricPerClass()
        r.label = category
        r.precision = precisions
        r.recall = recalls
        r.ap = ap
        r.interpolated_recall = np.array(mrec)
        r.interpolated_precision = np.array(mpre)
        r.tp = np.sum(tps)
        r.fp = np.sum(fps)
        r.num_groundtruth = len(golds)
        r.num_detection = len(preds)
        ret[category] = r
    return ret


def calculate_all_points_average_precision(recall: List[float], precision: List[float]) \
        -> Tuple[float, List[float], List[float], List[int]]:
    """
    All-point interpolated average precision

    Returns:
        average precision
        interpolated recall
        interpolated precision
        interpolated points
    """
    mrec = [0.0] + [e for e in recall] + [1.0]
    mpre = [0.0] + [e for e in precision] + [0]
    for i in range(len(mpre) - 1, 0, -1):
        mpre[i - 1] = max(mpre[i - 1], mpre[i])
    ii = []
    for i in range(len(mrec) - 1):
        if mrec[i + 1] != mrec[i]:
            ii.append(i + 1)
    ap = 0
    for i in ii:
        ap = ap + np.sum((mrec[i] - mrec[i - 1]) * mpre[i])
    return ap, mrec[0:len(mpre) - 1], mpre[0:len(mpre) - 1], ii


def calculate_11_points_average_precision(recall: List[float], precision: List[float]) -> Tuple[float, List[float], List[float]]:
    """
    11-point interpolated average precision. This is done by segmenting the recalls evenly into 11 parts:
        {0,0.1,0.2,...,0.9,1}.

    Args:
        recall: recall list
        precision: precision list

    Returns:
        average precision, interpolated recall, interpolated precision

    """
    mrec = [e for e in recall]
    mpre = [e for e in precision]
    recall_values = np.linspace(0, 1, 11)
    recall_values = list(recall_values[::-1])
    rho_interp = []
    recall_valid = []
    # For each recallValues (0, 0.1, 0.2, ... , 1)
    for r in recall_values:
        # Obtain all recall values higher or equal than r
        arg_greater_recalls = np.argwhere(mrec[:] >= r)
        pmax = 0
        # If there are recalls above r
        if arg_greater_recalls.size != 0:
            pmax = max(mpre[arg_greater_recalls.min():])
        recall_valid.append(r)
        rho_interp.append(pmax)
    # By definition AP = sum(max(precision whose recall is above r))/11
    ap = sum(rho_interp) / 11
    # Generating values for the plot
    rvals = [recall_valid[0]] + [e for e in recall_valid] + [0]
    pvals = [0] + [e for e in rho_interp] + [0]
    # rhoInterp = rhoInterp[::-1]
    cc = []
    for i in range(len(rvals)):
        p = (rvals[i], pvals[i - 1])
        if p not in cc:
            cc.append(p)
        p = (rvals[i], pvals[i])
        if p not in cc:
            cc.append(p)
    recall_values = [i[0] for i in reversed(cc)]
    precision_values = [i[1] for i in reversed(cc)]
    return ap, recall_values, precision_values

podm.box

报错：

ModuleNotFoundError: No module named ‘podm.coco’

新建coco.py文件，并添加以下内容：

from enum import Enum
from typing import Tuple


class Box:
    """
                0,0 ------> x (width)
         |
         |  (Left,Top)
         |      *_________
         |      |         |
                |         |
         y      |_________|
      (height)            *
                    (Right,Bottom)

    xtl: the X top-left coordinate of the bounding box.
    ytl: the Y top-left coordinate of the bounding box.
    xbr: the X bottom-right coordinate of the bounding box.
    ybr: the Y bottom-right coordinate of the bounding box.
    """
    def __init__(self):
        self.xtl = None  # type: float or None
        self.ytl = None  # type: float or None
        self.xbr = None  # type: float or None
        self.ybr = None  # type: float or None

    @classmethod
    def of_box(cls, xtl: float, ytl: float, xbr: float, ybr: float) -> 'Box':
        """
        :param xtl: the X top-left coordinate of the bounding box.
        :param ytl: the Y top-left coordinate of the bounding box.
        :param xbr: the X bottom-right coordinate of the bounding box.
        :param ybr: the Y bottom-right coordinate of the bounding box.
        """
        box = Box()
        box.xtl = xtl
        box.ytl = ytl
        box.xbr = xbr
        box.ybr = ybr
        box.verify()
        return box

    def set_box(self, box: 'Box'):
        self.xtl = box.xtl
        self.ytl = box.ytl
        self.xbr = box.xbr
        self.ybr = box.ybr

    def verify(self):
        assert self.xtl <= self.xbr, f'xtl < xbr: xtl:{self.xtl}, xbr:{self.xbr}'
        assert self.ytl <= self.ybr, f'ytl < ybr: ytl:{self.ytl}, xbr:{self.ybr}'

    @property
    def segment(self):
        return [self.xtl, self.ytl, self.xtl, self.ybr, self.xbr, self.ybr, self.xbr, self.ytl]

    @property
    def width(self) -> float:
        return self.xbr - self.xtl

    @property
    def height(self) -> float:
        return self.ybr - self.ytl

    @property
    def area(self) -> float:
        return (self.xbr - self.xtl) * (self.ybr - self.ytl)

    @property
    def center(self) -> Tuple[float, float]:
        return (self.xbr + self.xtl) / 2, (self.ybr + self.ytl) / 2

    def __contains__(self, item):
        if not type(item) == list and not type(item) == tuple:
            raise TypeError('Has to be a list or a tuple: %s' % type(item))
        if len(item) == 2:
            return self.xtl <= item[0] < self.xbr and self.ytl <= item[1] < self.ybr
        else:
            raise ValueError('Only support a point')

    def __str__(self):
        return 'Box[xtl={},ytl={},xbr={},ybr={}]'.format(self.xtl, self.ytl, self.xbr, self.ybr)

    def __eq__(self, other):
        if not isinstance(other, Box):
            return False
        return self.xtl == other.xtl and self.ytl == other.ytl and self.xbr == other.xbr and self.ybr == other.ybr


def intersection_over_union(box1: 'Box', box2: 'Box') -> float:
    """
    Intersection Over Union (IOU) is measure based on Jaccard Index that evaluates the overlap between
    two bounding boxes.
    """
    # if boxes dont intersect
    if not is_intersecting(box1, box2):
        return 0
    intersection_area = intersection(box1, box2).area
    union = union_areas(box1, box2, intersection_area=intersection_area)
    # intersection over union
    iou = intersection_area / union
    assert iou >= 0, '{} = {} / {}, box1={}, box2={}'.format(iou, intersection, union, box1, box2)
    return iou


def is_intersecting(box1: 'Box', box2: 'Box') -> bool:
    if box1.xtl > box2.xbr:
        return False  # boxA is right of boxB
    if box2.xtl > box1.xbr:
        return False  # boxA is left of boxB
    if box1.ybr < box2.ytl:
        return False  # boxA is above boxB
    if box1.ytl > box2.ybr:
        return False  # boxA is below boxB
    return True


def union_areas(box1: 'Box', box2: 'Box', intersection_area: float = None) -> float:
    if intersection_area is None:
        intersection_area = intersection(box1, box2).area
    return box1.area + box2.area - intersection_area


def union(box1: 'Box', box2: 'Box'):
    xtl = min(box1.xtl, box2.xtl)
    ytl = min(box1.ytl, box2.ytl)
    xbr = max(box1.xbr, box2.xbr)
    ybr = max(box1.ybr, box2.ybr)
    return Box.of_box(xtl, ytl, xbr, ybr)


def intersection(box1: 'Box', box2: 'Box'):
    xtl = max(box1.xtl, box2.xtl)
    ytl = max(box1.ytl, box2.ytl)
    xbr = min(box1.xbr, box2.xbr)
    ybr = min(box1.ybr, box2.ybr)
    return Box.of_box(xtl, ytl, xbr, ybr)


class BBFormat(Enum):
    """
    Class representing the format of a bounding box.
    It can be (X,Y,width,height) => XYWH
    or (X1,Y1,X2,Y2) => XYX2Y2

        Developed by: Rafael Padilla
        Last modification: May 24 2018
    """
    XYWH = 1
    X1Y1X2Y2 = 2


# class BoundingBox(Box):
#     def __init__(self):
#         """Constructor.
#         Args:
#             image: image.
#             category: category.
#             xtl: the X top-left coordinate of the bounding box.
#             ytl: the Y top-left coordinate of the bounding box.
#             xbr: the X bottom-right coordinate of the bounding box.
#             ybr: the Y bottom-right coordinate of the bounding box.
#             score: (optional) the confidence of the detected class.
#         """
#         super(BoundingBox, self).__init__()
#         self.image = None
#         self.category = None
#         self.score = None  # type: float or None
#
#     @classmethod
#     def of_bbox(cls, image, category, xtl: float, ytl: float, xbr: float, ybr: float, score: float = None) \
#             -> 'BoundingBox':
#         bbox = BoundingBox()
#         bbox.xtl = xtl
#         bbox.ytl = ytl
#         bbox.xbr = xbr
#         bbox.ybr = ybr
#         bbox.image = image
#         bbox.score = score
#         bbox.category = category
#         return bbox

podm.coco_decoder

新建coco_decoder.py文件，并添加代码：

import copy
import json
from typing import Dict

from podm.coco import PCOCOLicense, PCOCOInfo, PCOCOImage, PCOCOCategory, PCOCOBoundingBox, PCOCOSegments, \
    PCOCOObjectDetectionDataset


def parse_infon(obj: Dict) -> PCOCOInfo:
    info = PCOCOInfo()
    info.contributor = obj['contributor']
    info.description = obj['description']
    info.url = obj['url']
    info.date_created = obj['date_created']
    info.version = obj['version']
    info.year = obj['year']
    return info


def parse_license(obj: Dict) -> PCOCOLicense:
    lic = PCOCOLicense()
    lic.id = obj['id']
    lic.name = obj['name']
    lic.url = obj['url']
    return lic


def parse_image(obj: Dict) -> PCOCOImage:
    img = PCOCOImage()
    img.id = obj['id']
    img.height = obj['height']
    img.width = obj['width']
    img.file_name = obj['file_name']
    img.flickr_url = obj['flickr_url']
    img.coco_url = obj['coco_url']
    img.date_captured = obj['date_captured']
    img.license = obj['license']
    return img


def parse_bounding_box(obj: Dict) -> PCOCOBoundingBox:
    ann = PCOCOBoundingBox()
    ann.id = obj['id']
    ann.category_id = obj['category_id']
    ann.image_id = obj['image_id']
    ann.xtl = obj['bbox'][0]
    ann.ytl = obj['bbox'][1]
    ann.xbr = ann.xtl + obj['bbox'][2]
    ann.ybr = ann.ytl + obj['bbox'][3]
    if 'contributor' in obj:
        ann.contributor = obj['contributor']
    if 'score' in obj:
        ann.score = obj['score']
    if 'attributes' in obj:
        ann.attributes = obj['attributes']
    return ann


def parse_segments(obj: Dict) -> PCOCOSegments:
    ann = PCOCOSegments()
    ann.id = obj['id']
    ann.category_id = obj['category_id']
    ann.image_id = obj['image_id']
    ann.iscrowd = obj['iscrowd']
    ann.segmentation = obj['segmentation']
    if 'score' in obj:
        ann.score = obj['score']
    if 'contributor' in obj:
        ann.contributor = obj['contributor']
    if 'attributes' in obj:
        ann.attributes = obj['attributes']
    return ann


def parse_category(obj: Dict) -> PCOCOCategory:
    cat = PCOCOCategory()
    cat.id = obj['id']
    cat.name = obj['name']
    cat.supercategory = obj['supercategory']
    return cat


def parse_object_detection_dataset(coco_obj: Dict) -> PCOCOObjectDetectionDataset:
    dataset = PCOCOObjectDetectionDataset()
    dataset.info = parse_infon(coco_obj['info'])

    for lic_obj in coco_obj['licenses']:
        lic = parse_license(lic_obj)
        dataset.licenses.append(lic)

    for img_obj in coco_obj['images']:
        img = parse_image(img_obj)
        dataset.images.append(img)

    for ann_obj in coco_obj['annotations']:
        if 'segmentation' in ann_obj and len(ann_obj['segmentation']) > 0:
            ann = parse_segments(ann_obj)
        else:
            ann = parse_bounding_box(ann_obj)
        dataset.add_annotation(ann)

    for cat_obj in coco_obj['categories']:
        cat = parse_category(cat_obj)
        dataset.categories.append(cat)

    return dataset


def load_true_object_detection_dataset(fp, **kwargs) -> PCOCOObjectDetectionDataset:
    coco_obj = json.load(fp, **kwargs)
    return parse_object_detection_dataset(coco_obj)


def load_pred_object_detection_dataset(fp, dataset: PCOCOObjectDetectionDataset, **kwargs) \
        -> PCOCOObjectDetectionDataset:
    new_dataset = PCOCOObjectDetectionDataset()
    new_dataset.info = copy.deepcopy(dataset.info)
    new_dataset.licenses = copy.deepcopy(dataset.licenses)
    new_dataset.images = copy.deepcopy(dataset.images)
    new_dataset.categories = copy.deepcopy(dataset.categories)
    # check annotation
    coco_obj = json.load(fp, **kwargs)
    annotations = []
    for obj in coco_obj:
        ann = parse_bounding_box(obj)
        if new_dataset.get_image(id=ann.image_id) is None:
            print('%s: Cannot find image' % ann.image_id)
        if new_dataset.get_category(id=ann.category_id) is None:
            print('%s: Cannot find category' % ann.category_id)
        annotations.append(ann)
    new_dataset.annotations = annotations
    return new_dataset

podm.coco_encoder

新建coco_encoder.py文件，并添加以下代码：


import json
from typing import Union, TextIO, Dict, List

from podm.coco import PCOCOImage, PCOCOLicense, PCOCOInfo, \
    PCOCOCategory, PCOCOBoundingBox, \
    PCOCOSegments, PCOCOObjectDetectionDataset

PCOCO_OBJ = Union[
    PCOCOImage, PCOCOLicense, PCOCOInfo,
    PCOCOCategory,
    PCOCOObjectDetectionDataset,
    List[PCOCOBoundingBox],
]


class PCOCOJSONEncoder(json.JSONEncoder):
    """
    Extensible BioC JSON encoder for BioC data structures.
    """

    def default(self, o):
        # print('xxxxxxxxxxxxxxxxxxxx')
        # print(type(o))
        # print(isinstance(o, PCOCOObjectDetectionDataset))
        # print(repr(o.__class__), repr(PCOCOObjectDetectionResult))
        if isinstance(o, PCOCOImage):
            return {
                "width": o.width,
                "height": o.height,
                "flickr_url": o.flickr_url,
                "coco_url": o.coco_url,
                "file_name": o.file_name,
                "date_captured": o.date_captured,
                "license": o.license,
                "id": o.id,
            }
        if isinstance(o, PCOCOLicense):
            return {
                "id": o.id,
                "name": o.name,
                "url": o.url,
            }
        if isinstance(o, PCOCOInfo):
            return {
                "year": o.year,
                "version": o.version,
                "description": o.description,
                "contributor": o.contributor,
                "url": o.url,
                "date_created": o.date_created,
            }
        if isinstance(o, PCOCOCategory):
            return {
                "id": o.id,
                "name": o.name,
                "supercategory": o.supercategory,
            }
        if isinstance(o, PCOCOBoundingBox):
            return {
                "id": o.id,
                "image_id": o.image_id,
                "category_id": o.category_id,
                "bbox": [o.xtl, o.ytl, o.width, o.height],
                "score": o.score,
                "contributor": o.contributor,
                "attributes": json.dumps(o.attributes)
            }
        if isinstance(o, PCOCOSegments):
            bb = o.bbox
            return {
                "id": o.id,
                "image_id": o.image_id,
                "category_id": o.category_id,
                "segmentation": o.segmentation,
                "bbox": [bb.xtl, bb.ytl, bb.width, bb.height],
                "area": bb.area,
                "iscrowd": o.iscrowd,
                "score": o.score,
                "contributor": o.contributor,
                "attributes": json.dumps(o.attributes)
            }
        if isinstance(o, PCOCOObjectDetectionDataset):
            return {
                "info": self.default(o.info),
                'images': [self.default(img) for img in o.images],
                "licenses": [self.default(l) for l in o.licenses],
                'annotations': [self.default(ann) for ann in o.annotations],
                'categories': [self.default(cat) for cat in o.categories],
            }
        # Let the base class default method raise the TypeError
        return json.JSONEncoder.default(self, o)


def toJSON(o) -> Dict:
    """
    Convert a pcoco obj to a Python `dict`
    """
    return PCOCOJSONEncoder().default(o)


def dumps(obj: PCOCO_OBJ, **kwargs) -> str:
    """
    Serialize a BioC ``obj`` to a JSON formatted ``str``. kwargs are passed to json.
    """
    return json.dumps(obj, cls=PCOCOJSONEncoder, indent=2, **kwargs)


def dump(obj: PCOCO_OBJ, fp: TextIO, **kwargs):
    """
    Serialize ``obj`` as a JSON formatted stream to ``fp``
    (a ``.write()``-supporting file-like object). kwargs are passed to json.
    """
    return json.dump(obj, fp, cls=PCOCOJSONEncoder, indent=2, **kwargs)
    ```
    
    ## podm.coco
    新建`coco.py`文件，并添加：
    ```python
    import copy
from abc import ABC
from typing import List, Tuple, Set, Collection
from datetime import date, datetime
from shapely.geometry import Polygon, Point
from podm import box


class PCOCOInfo:
    def __init__(self):
        self.year = date.today().year  # type:int
        self.version = ''  # type: str
        self.description = ''  # type: str
        self.contributor = ''  # type: str
        self.url = ''  # type: str
        self.date_created = datetime.now().strftime('%m/%d/%Y')  # type:str


class PCOCOAnnotation(ABC):
    def __init__(self):
        self.id = None  # type:int or None
        self.image_id = None  # type:int or None
        self.score = None  # type:float or None
        self.contributor = ''  # type: str
        self.attributes = {}  # type: dict


class PCOCOImage:
    def __init__(self):
        self.id = None  # type:int or None
        self.width = 0  # type:int
        self.height = 0  # type:int
        self.file_name = ''  # type:str
        self.license = None  # type:int or None
        self.flickr_url = ''  # type:str
        self.coco_url = ''  # type:str
        self.date_captured = datetime.now().strftime('%m/%d/%Y')  # type:str


class PCOCOLicense:
    def __init__(self):
        self.id = None  # type:int or None
        self.name = ''  # type:str
        self.url = ''  # type:str


class PCOCOCategory:
    def __init__(self):
        self.id = None  # type:int or None
        self.name = ''  # type:str
        self.supercategory = ''  # type:str


class PCOCODataset(ABC):
    def __init__(self):
        self.info = PCOCOInfo()  # type: PCOCOInfo or None
        self.images = []  # type: List[PCOCOImage]
        self.licenses = []  # type: List[PCOCOLicense]

    def add_license(self, license: PCOCOLicense):
        for lic in self.licenses:
            if lic.id == license.id or lic.name == license.name:
                raise KeyError('%s: License exists' % lic.id)
        self.licenses.append(license)

    def add_image(self, image: PCOCOImage):
        for img in self.images:
            if img.id == image.id or img.file_name == image.file_name:
                raise KeyError('%s: Image exists' % img.id)
        self.images.append(image)

    def get_image(self, id: int = None, file_name: str = None, default=None) -> PCOCOImage:
        if id is None and file_name is None:
            raise KeyError('%s %s: Cannot set both to None' % (id, file_name))
        if id is not None and file_name is not None:
            raise KeyError('%s %s: Cannot set both' % (id, file_name))

        imgs = self.images
        if id is not None:
            imgs = [img for img in imgs if img.id == id]
            if len(imgs) == 0:
                return default
            elif len(imgs) == 1:
                return next(iter(imgs))
            else:
                raise KeyError('%s: more than one image with the same id' % id)

        if file_name is not None:
            imgs = [img for img in imgs if img.file_name == file_name]
            if len(imgs) == 0:
                return default
            elif len(imgs) == 1:
                return next(iter(imgs))
            else:
                raise KeyError('%s: more than one image with the same name' % file_name)

        raise Exception('Should not be here')

    def get_images(self, ids: Collection[int] = None) -> List[PCOCOImage]:
        """
        Load anns with the specified ids.
        :param ids: integer ids specifying img
        :return: imgs: loaded img objects
        """
        return [img for img in self.images if img.id in ids]


##############################################################################
# object detection
##############################################################################


class PCOCOBoundingBox(PCOCOAnnotation, box.Box):
    def __init__(self):
        super(PCOCOBoundingBox, self).__init__()
        self.category_id = None  # type:int or None


class PCOCOSegments(PCOCOAnnotation):
    def __init__(self):
        super(PCOCOSegments, self).__init__()
        self.category_id = None  # type:int or None
        self.segmentation = []  # type: List[List[float]]
        self.iscrowd = False  # type:bool

    def add_box(self, box: box.Box):
        self.add_segmentation(box.segment)

    def add_segmentation(self, segmentation: List[float]):
        self.segmentation.append(segmentation)

    def __contains__(self, item):
        if not type(item) == list and not type(item) == tuple:
            raise TypeError('Has to be a list or a tuple: %s' % type(item))
        if len(item) == 2:
            point = Point(item[0], item[1])
            for p in self.polygons:
                if p.contains(point):
                    return True
            return False
        else:
            raise ValueError('Only support a point')

    @property
    def polygons(self) -> List[Polygon]:
        return [Polygon([(seg[i], seg[i+1]) for i in range(0, len(seg), 2)]) for seg in self.segmentation]

    @property
    def bbox(self) -> 'box.Box' or None:
        if len(self.segmentation) == 0:
            return None
        else:
            b = self.box_polygon(self.segmentation[0])
            for polygon in self.segmentation[1:]:
                b = box.union(b, self.box_polygon(polygon))
            return b

    @classmethod
    def box_polygon(cls, polygon: List[float]) -> 'box.Box':
        xtl = min(polygon[i] for i in range(0, len(polygon), 2))
        ytl = min(polygon[i] for i in range(1, len(polygon), 2))
        xbr = max(polygon[i] for i in range(0, len(polygon), 2))
        ybr = max(polygon[i] for i in range(1, len(polygon), 2))
        return box.Box.of_box(xtl, ytl, xbr, ybr)


class PCOCOImageCaptioning(PCOCOAnnotation):
    def __init__(self):
        super(PCOCOImageCaptioning, self).__init__()
        self.caption = None  # type:str or None


class PCOCOObjectDetectionDataset(PCOCODataset):
    def __init__(self):
        super(PCOCOObjectDetectionDataset, self).__init__()
        self.annotations = []  # type: List[PCOCOBoundingBox or PCOCOSegments]
        self.categories = []  # type: List[PCOCOCategory]

    def add_annotation(self, annotation: 'PCOCOBoundingBox' or 'PCOCOSegments'):
        for ann in self.annotations:
            if ann.id == annotation.id:
                raise KeyError('%s: Annotation exists' % ann.id)
        self.annotations.append(annotation)

    def add_category(self, category: PCOCOCategory):
        for cat in self.categories:
            if cat.id == category.id or cat.name == category.name:
                raise KeyError('%s: Category exists' % cat.id)
        self.categories.append(category)

    def get_max_category_id(self):
        return max(cat.id for cat in self.categories)

    def get_category(self, id: int = None, name: str = None, default=None) -> PCOCOCategory:
        if id is None and name is None:
            raise KeyError('%s %s: Cannot set both to None' % (id, name))
        if id is not None and name is not None:
            raise KeyError('%s %s: Cannot set both' % (id, name))

        cats = self.categories
        if id is not None:
            cats = [cat for cat in cats if cat.id == id]
            if len(cats) == 0:
                return default
            elif len(cats) == 1:
                return next(iter(cats))
            else:
                raise KeyError('%s: more than one category with the same id' % id)

        if name is not None:
            cats = [cat for cat in cats if cat.name == name]
            if len(cats) == 0:
                return default
            elif len(cats) == 1:
                return next(iter(cats))
            else:
                raise KeyError('%s: more than one category with the same name' % name)

        raise Exception('Should not be here')

    def get_annotation(self, id: int, default=None) -> PCOCOAnnotation:
        anns = [ann for ann in self.annotations if ann.id == id]
        if len(anns) == 0:
            return default
        elif len(anns) == 1:
            return next(iter(anns))
        else:
            raise KeyError('%s: more than one annotation' % id)

    def get_new_dataset(self, annotations: Collection[PCOCOBoundingBox or PCOCOSegments]):
        new_dataset = PCOCOObjectDetectionDataset()
        new_dataset.info = copy.deepcopy(self.info)
        new_dataset.licenses = copy.deepcopy(self.licenses)
        new_dataset.images = copy.deepcopy(self.images)
        new_dataset.categories = copy.deepcopy(self.categories)
        for ann in annotations:
            new_dataset.add_annotation(ann)
        return new_dataset

    def get_category_ids(self, category_names: Collection[str] = None,
                         supercategory_names: Collection[str] = None) -> Collection[int]:
        """
        filtering parameters. default skips that filter.
        :param category_names: get cats for given cat names
        :param supercategory_names: get cats for given supercategory names
        :return: integer array of cat ids
        """
        itr = iter(self.categories)
        if category_names is not None:
            itr = filter(lambda x: x.name in category_names, itr)
        if supercategory_names is not None:
            itr = filter(lambda x: x.supercategory in supercategory_names, itr)
        return [cat.id for cat in itr]

    def get_annotation_ids(self, image_ids: Collection[int] = None,
                           category_ids: Collection[int] = None,
                           area_range: Tuple[float, float] = None) -> Collection[int]:
        """
        Get ann ids that satisfy given filter conditions. default skips that filter
        :param image_ids: get anns for given imgs
        :param category_ids: get anns for given cats
        :param area_range: get anns for given area range (e.g. [0 inf])
        :return: integer array of ann ids
        """
        # image_ids = convert_array_argument(image_ids)
        # category_ids = convert_array_argument(category_ids)
        # area_range = convert_array_argument(area_range)

        itr = iter(self.annotations)
        if image_ids is not None:
            itr = filter(lambda x: x.image_id in image_ids, itr)
        if category_ids is not None:
            itr = filter(lambda x: x.category_id in category_ids, itr)
        if area_range is not None:
            itr = filter(lambda x: area_range[0] <= x.area <= area_range[1], itr)
        return [ann.id for ann in itr]

    def get_image_ids(self, category_ids: Collection[int] = None) -> Collection[int]:
        """
        Get img ids that satisfy given filter conditions.
        :param category_ids: get imgs with all given cats
        :return: ids: integer array of img ids
        """
        ids = set(img.id for img in self.images)
        for i, cat_id in enumerate(category_ids):
            if i == 0 and len(ids) == 0:
                ids = set(ann.image_id for ann in self.annotations if ann.category_id == cat_id)
            else:
                ids &= set(ann.image_id for ann in self.annotations if ann.category_id == cat_id)
        return list(ids)

    def get_annotations(self, ids: Collection[int] = None) -> Collection[PCOCOAnnotation]:
        """
        Load anns with the specified ids.
        :param ids: integer ids specifying anns
        :return: anns: loaded ann objects
        """
        return [ann for ann in self.annotations if ann.id in ids]

    def get_categories(self, ids: Collection[int] = None) -> Collection[PCOCOCategory]:
        """
        Load cats with the specified ids.
        :param ids: integer ids specifying cats
        :return: cats: loaded cat objects
        """
        return [cat for cat in self.categories if cat.id in ids]

podm.coco2labelme

新建coco2labelme.py文件，并添加代码：

from podm.coco import PCOCOObjectDetectionDataset, PCOCOBoundingBox, PCOCOSegments


def coco2labelme(cocodataset: PCOCOObjectDetectionDataset):
    objs = []
    for img in cocodataset.images:
        obj = {
            "version": "5.0.1",
            "flags": {},
            "imagePath": img.file_name,
            "imageData": None,
            "imageHeight": img.height,
            "imageWidth": img.width,
            "shapes": []
        }
        for annid in cocodataset.get_annotation_ids(image_ids=[img.id]):
            ann = cocodataset.get_annotation(annid)
            if isinstance(ann, PCOCOBoundingBox):
                shape = {
                    "label": ann.attributes['ID'],
                    "points": [[ann.xtl, ann.ytl], [ann.xbr, ann.ybr]],
                    "group_id": None,
                    "shape_type": "rectangle",
                    "flags": {}
                }
            elif isinstance(ann, PCOCOSegments):
                shape = {
                    "label": ann.attributes['ID'],
                    "points": [[ann.segmentation[0][i], ann.segmentation[0][i+1]]
                               for i in range(0, len(ann.segmentation[0]), 2)],
                    "group_id": None,
                    "shape_type": "polygon",
                    "flags": {}
                }
            else:
                raise TypeError
            obj['shapes'].append(shape)

        objs.append(obj)

    return objs

podm.pascal2coco

新建pascal2coco.py文件，并添加以下代码：

"""
Convert from a PASCAL VOC zip file to a COCO file.

Usage:
    pascal2coco gold --gold=<file> --output-gold=<file>
    pascal2coco pred --gold=<file> --pred=<file> --output-gold=<file> --output-pred=<file>

Options:
    --gold=<file>           PASCAL VOC groundtruths zip file
    --pred=<file>           PASCAL VOC predictions zip file
    --output-gold=<file>    Groundtruths JSON file
    --output-pred=<file>    Predictions JSON file
"""
import json
import zipfile
import io
import warnings

import docopt
import tqdm
import pandas as pd
from podm import coco_encoder
from podm.box import BBFormat, Box
from podm.coco import PCOCOObjectDetectionDataset, PCOCOImage, PCOCOCategory, PCOCOBoundingBox


def convert_pascal_to_df(src):
    rows = []
    with zipfile.ZipFile(src, 'r') as myzip:
        namelist = myzip.namelist()
        for name in tqdm.tqdm(namelist):
            if not name.endswith('.txt'):
                continue
            with myzip.open(name, 'r') as fp:
                name = name[name.find('/') + 1:]
                items_file = io.TextIOWrapper(fp)
                for line in items_file:
                    toks = line.strip().split(' ')
                    if len(toks) == 5:
                        row = {
                            'name': name,
                            'label': toks[0],
                            'xtl': int(toks[1]),
                            'ytl': int(toks[2]),
                            'xbr': int(toks[3]),
                            'ybr': int(toks[4])
                        }
                    elif len(toks) == 6:
                        row = {
                            'name': name,
                            'label': toks[0],
                            'score': float(toks[1]),
                            'xtl': int(toks[2]),
                            'ytl': int(toks[3]),
                            'xbr': int(toks[4]),
                            'ybr': int(toks[5])
                        }
                    else:
                        raise ValueError
                    rows.append(row)
    return pd.DataFrame(rows)


class PascalVoc2COCO:
    def __init__(self, format: BBFormat = BBFormat.X1Y1X2Y2):
        self.format = format

    def convert_gold(self, src) -> PCOCOObjectDetectionDataset:
        df = convert_pascal_to_df(src)

        dataset = PCOCOObjectDetectionDataset()
        # add image
        for i, name in enumerate(df['name'].unique()):
            img = PCOCOImage()
            img.id = i
            img.file_name = name
            dataset.add_image(img)
        # add category
        for i, label in enumerate(df['label'].unique()):
            cat = PCOCOCategory()
            cat.id = i
            cat.name = label
            dataset.add_category(cat)
        # add annotation
        for i, row in tqdm.tqdm(df.iterrows(), total=len(df)):
            box = Box.of_box(row['xtl'], row['ytl'], row['xbr'], row['ybr'])
            if self.format == BBFormat.XYWH:
                box.xbr += box.xtl
                box.ybr += box.ytl
            ann = PCOCOBoundingBox()
            ann.image_id = dataset.get_image(file_name=row['name']).id
            ann.id = i
            ann.category_id = dataset.get_category(name=row['label']).id
            ann.set_box(box)
            dataset.add_annotation(ann)
        return dataset

    def convert_gold_file(self, src, dest):
        dataset = self.convert_gold(src)
        with open(dest, 'w') as fp:
            coco_encoder.dump(dataset, fp)

    def convert_gold_pred(self, src_gold, src_pred):
        gold_dataset = self.convert_gold(src_gold)

        df = convert_pascal_to_df(src_pred)
        # check cat
        subrows = []
        for i, row in tqdm.tqdm(df.iterrows(), total=len(df)):
            if gold_dataset.get_category(name=row['label']) is None:
                warnings.warn('%s: Category does not exist' % row['label'])
                continue
            if gold_dataset.get_image(file_name=row['name']) is None:
                warnings.warn('%s: Image does not exist' % row['name'])
                continue
            subrows.append(row)
        if len(subrows) < len(df):
            warnings.warn('Remove %s rows' % (len(df) - len(subrows)))

        annotations = []
        for i, row in tqdm.tqdm(enumerate(subrows), total=len(subrows)):
            box = Box.of_box(row['xtl'], row['ytl'], row['xbr'], row['ybr'])
            if self.format == BBFormat.XYWH:
                box.xbr += box.xtl
                box.ybr += box.ytl
            ann = PCOCOBoundingBox()
            ann.image_id = gold_dataset.get_image(file_name=row['name']).id
            ann.id = i
            ann.category_id = gold_dataset.get_category(name=row['label']).id
            ann.score = row['score']
            ann.set_box(box)
            annotations.append(ann)

        pred_dataset = gold_dataset.get_new_dataset(annotations)
        return gold_dataset, pred_dataset

    def convert_gold_pred_file(self, src_gold, src_pred, dest_gold, dest_pred):
        gold_dataset, pred_dataset = self.convert_gold_pred(src_gold, src_pred)
        with open(dest_gold, 'w') as fp:
            coco_encoder.dump(gold_dataset, fp)

        with open(dest_pred, 'w') as fp:
            json.dump(pred_dataset.annotations, fp, cls=coco_encoder.PCOCOJSONEncoder, indent=2)


def main():
    argv = docopt.docopt(__doc__)
    converter = PascalVoc2COCO(BBFormat.X1Y1X2Y2)
    if argv['gold']:
        converter.convert_gold_file(argv['--gold'], argv['--output-gold'])
    if argv['pred']:
        converter.convert_gold_pred_file(argv['--gold'], argv['--pred'], argv['--output-gold'], argv['--output-pred'])


if __name__ == '__main__':
    main()