目标检测COCO格式数据计算AP

FH'_'hui

已于 2024-09-10 15:42:38 修改

阅读量247

点赞数 2

文章标签：目标检测人工智能计算机视觉

于 2024-09-10 14:10:49 首次发布

本文链接：https://blog.csdn.net/weixin_43270242/article/details/142097300

版权

背景

在布局分析开发过程中，不同的目标检测框架，往往会提供不同的目标检测结果，在这一环境下，有一些布局分析模型只会给出具体的框图，但是没有给出AP的值，如果需要比较不同模型的测试集具体数据的话会产生不方便。
基于这种情况，笔者写了一份脚本用来将不同的结果文件的AP计算出来。

代码

具体代码如下（AP50）：

#coding=utf-8
import json
import numpy as np

def load_json(file_path):
    """Load JSON data from file."""
    with open(file_path, 'r') as f:
        return json.load(f)

def calculate_iou(box1, box2):
    """iou计算，注意这里的应该是coco格式"""
    # box = [x1, y1, w, h]
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2

    # Calculate overlap area
    inter_x1 = max(x1, x2)
    inter_y1 = max(y1, y2)
    inter_x2 = min(x1 + w1, x2 + w2)
    inter_y2 = min(y1 + h1, y2 + h2)
    
    inter_area = max(0, inter_x2 - inter_x1 + 1) * max(0, inter_y2 - inter_y1 + 1)

    # Calculate union area
    area1 = w1 * h1
    area2 = w2 * h2
    union_area = area1 + area2 - inter_area

    # Calculate IoU
    iou = inter_area / union_area if union_area > 0 else 0
    return iou

def match_predictions_to_gt(predictions, gts, iou_threshold=0.5):
    tp, fp, fn = 0, 0, 0
    matched_gts = set()
    
    for pred_box in predictions:
        best_iou = 0
        best_gt_idx = -1
        for idx, gt_box in enumerate(gts):
            iou = calculate_iou(pred_box, gt_box)
            if iou > best_iou:
                best_iou = iou
                best_gt_idx = idx

        if best_iou >= iou_threshold and best_gt_idx not in matched_gts:
            tp += 1
            matched_gts.add(best_gt_idx)
        else:
            fp += 1

    fn = len(gts) - len(matched_gts)
    return tp, fp, fn

def calculate_precision_recall(tp, fp, fn):
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    return precision, recall

def calculate_ap(precisions, recalls):
    """Calculate Average Precision (AP) from precision and recall lists."""
    precisions = np.array(precisions)
    recalls = np.array(recalls)

    # Sort by recall
    sorted_indices = np.argsort(recalls)
    recalls_sorted = recalls[sorted_indices]
    precisions_sorted = precisions[sorted_indices]

    # Compute AP as area under the precision-recall curve
    ap = 0.0
    for i in range(1, len(recalls_sorted)):
        ap += (recalls_sorted[i] - recalls_sorted[i - 1]) * precisions_sorted[i]
    return ap

# Load the JSON data
val_data = load_json('/mnt/md0/unilm-master/layoutlmv3/examples/object_detection/path/to/data/CDLA_DATASET/val.json')
out2_data = load_json('/mnt/md0/ragflow-main/out2.json')

# Extract ground truth boxes from val_data
gt_boxes = {img['id']: [] for img in val_data['images']}
for annotation in val_data['annotations']:
    img_id = annotation['image_id']
    bbox = annotation['bbox']
    cat_id = annotation['category_id']
    gt_boxes[img_id].append((bbox, cat_id))

# Extract predicted boxes from out2_data
pred_boxes = {img['id']: [] for img in out2_data['images']}
for annotation in out2_data['annotations']:
    img_id = annotation['image_id']
    bbox = annotation['bbox']
    cat_id = annotation['category_id']
    score = annotation['score']
    pred_boxes[img_id].append((bbox, cat_id, score))

# Get category ids present in val_data
category_ids_val = set(cat_id for img in val_data['images'] for cat_id in [ann['category_id'] for ann in val_data['annotations'] if ann['image_id'] == img['id']])

# Calculate precision, recall, and AP for each category
precisions = {cat_id: [] for cat_id in category_ids_val}
recalls = {cat_id: [] for cat_id in category_ids_val}

for img_id in gt_boxes:
    gt = gt_boxes.get(img_id, [])
    pred = pred_boxes.get(img_id, [])
    
    # Sort predictions by confidence
    pred = sorted(pred, key=lambda x: x[2], reverse=True)
    
    # Group ground truth boxes by category
    gt_by_category = {cat_id: [] for cat_id in category_ids_val}
    for bbox, cat_id in gt:
        gt_by_category[cat_id].append(bbox)

    # Match predictions to ground truth for each category
    for cat_id in category_ids_val:
        cat_preds = [box for box, pred_cat_id, _ in pred if pred_cat_id == cat_id]
        cat_gt = gt_by_category[cat_id]
        tp, fp, fn = match_predictions_to_gt(cat_preds, cat_gt)
        precision, recall = calculate_precision_recall(tp, fp, fn)
        precisions[cat_id].append(precision)
        recalls[cat_id].append(recall)

# Calculate AP for each category
aps = {}
for cat_id in category_ids_val:
    if precisions[cat_id] and recalls[cat_id]:
        aps[cat_id] = calculate_ap(precisions[cat_id], recalls[cat_id])
    else:
        aps[cat_id] = 0.0

# Calculate mAP (mean Average Precision)
mAP = np.mean(list(aps.values()))

# Print results
print(f"Mean Average Precision (mAP): {mAP}")
print("Average Precision (AP) per category:")
for cat_id, ap in aps.items():
    print(f"Category ID {cat_id}: {ap}")

AP：

#coding=utf-8
import json
import numpy as np

def load_json(file_path):
    """Load JSON data from file."""
    with open(file_path, 'r') as f:
        return json.load(f)

def calculate_iou(box1, box2):
    # box = [x1, y1, w, h]
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2

    # Calculate overlap area
    inter_x1 = max(x1, x2)
    inter_y1 = max(y1, y2)
    inter_x2 = min(x1 + w1, x2 + w2)
    inter_y2 = min(y1 + h1, y2 + h2)
    
    inter_area = max(0, inter_x2 - inter_x1 + 1) * max(0, inter_y2 - inter_y1 + 1)

    # Calculate union area
    area1 = w1 * h1
    area2 = w2 * h2
    union_area = area1 + area2 - inter_area

    # Calculate IoU
    iou = inter_area / union_area if union_area > 0 else 0
    return iou

def match_predictions_to_gt(predictions, gts, iou_threshold):
    tp, fp, fn = 0, 0, 0
    matched_gts = set()
    
    for pred_box in predictions:
        best_iou = 0
        best_gt_idx = -1
        for idx, gt_box in enumerate(gts):
            iou = calculate_iou(pred_box, gt_box)
            if iou > best_iou:
                best_iou = iou
                best_gt_idx = idx

        if best_iou >= iou_threshold and best_gt_idx not in matched_gts:
            tp += 1
            matched_gts.add(best_gt_idx)
        else:
            fp += 1

    fn = len(gts) - len(matched_gts)
    return tp, fp, fn

def calculate_precision_recall(tp, fp, fn):
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    return precision, recall

def calculate_ap(precisions, recalls):
    """Calculate Average Precision (AP) from precision and recall lists."""
    precisions = np.array(precisions)
    recalls = np.array(recalls)

    # Sort by recall
    sorted_indices = np.argsort(recalls)
    recalls_sorted = recalls[sorted_indices]
    precisions_sorted = precisions[sorted_indices]

    # Compute AP as area under the precision-recall curve
    ap = 0.0
    for i in range(1, len(recalls_sorted)):
        ap += (recalls_sorted[i] - recalls_sorted[i - 1]) * precisions_sorted[i]
    return ap

# Load the JSON data
val_data = load_json('/mnt/md0/zhangfanhao/unilm-master/layoutlmv3/examples/object_detection/path/to/data/CDLA_DATASET/val.json')
out2_data = load_json('/mnt/md0/zhangfanhao/ragflow-main/out2.json')
gt_boxes = {img['id']: [] for img in val_data['images']}
for annotation in val_data['annotations']:
    img_id = annotation['image_id']
    bbox = annotation['bbox']
    cat_id = annotation['category_id']
    gt_boxes[img_id].append((bbox, cat_id))

# Extract predicted boxes from out2_data
pred_boxes = {img['id']: [] for img in out2_data['images']}
for annotation in out2_data['annotations']:
    img_id = annotation['image_id']
    bbox = annotation['bbox']
    cat_id = annotation['category_id']
    score = annotation['score']
    pred_boxes[img_id].append((bbox, cat_id, score))
category_ids_val = set(cat_id for img in val_data['images'] for cat_id in [ann['category_id'] for ann in val_data['annotations'] if ann['image_id'] == img['id']])
threshold = 0.5
apres={cat_id: [] for cat_id in category_ids_val}
apmap=[]
while(threshold<=0.95):
# Calculate precision, recall, and AP for each category
    precisions = {cat_id: [] for cat_id in category_ids_val}
    recalls = {cat_id: [] for cat_id in category_ids_val}

    for img_id in gt_boxes:
        gt = gt_boxes.get(img_id, [])
        pred = pred_boxes.get(img_id, [])
    
    # Sort predictions by confidence
        pred = sorted(pred, key=lambda x: x[2], reverse=True)
    
    # Group ground truth boxes by category
        gt_by_category = {cat_id: [] for cat_id in category_ids_val}
        for bbox, cat_id in gt:
            gt_by_category[cat_id].append(bbox)

    # Match predictions to ground truth for each category
        for cat_id in category_ids_val:
            cat_preds = [box for box, pred_cat_id, _ in pred if pred_cat_id == cat_id]
            cat_gt = gt_by_category[cat_id]
            tp, fp, fn = match_predictions_to_gt(cat_preds, cat_gt,threshold)
            precision, recall = calculate_precision_recall(tp, fp, fn)
            precisions[cat_id].append(precision)
            recalls[cat_id].append(recall)

# Calculate AP for each category
    aps = {}
    for cat_id in category_ids_val:
        if precisions[cat_id] and recalls[cat_id]:
            aps[cat_id] = calculate_ap(precisions[cat_id], recalls[cat_id])
        else:
            aps[cat_id] = 0.0

# Calculate mAP (mean Average Precision)
    mAP = np.mean(list(aps.values()))
    apmap.append(mAP)
# Print results
    print(f"Mean Average Precision (mAP): {mAP}")
    print("Average Precision (AP) per category:")
    for cat_id, ap in aps.items():
        print(f"Category ID {cat_id}: {ap}")
        apres[cat_id].append(ap)       
    
    threshold=threshold+0.05
mAPres = np.mean(apmap)

print(f"Mean Average Precision (mAPRes): {mAPres}")
print("Average Precision (AP) per category:")
for cat_id, ap in apres.items():
    apmean=np.mean(ap)
    print(f"Category ID {cat_id}: {apmean}")