windows上COCO数据集绘制bbox脚本(无需cocoapi) + 补充网格和Anchor + IoU计算

windows上折腾COCOAPI有点儿麻烦,没问题的嗖的一下就完事儿了,有问题的坑一堆,我这不想折腾了,直接读它的json文件+图片的名字画框算了,以下是Python脚本

"""
Created on Sat May 15 14:17:42 2021

@author: Ryan
"""

import os
import json
import numpy as np
import cv2 # 注意用 cv2 不能有中文路径, 有的话建议用下边 cv_imread 那个函数

def cv_imread(file_name):
    
    # 摘自:
    # https://www.zhihu.com/question/67157462/answer/251754530
    # 用于解决 cv2 无法直接读取路径含有中文的图片
    
    cv_img = cv2.imdecode(np.fromfile(file_name, dtype=np.uint8), -1)
    return cv_img


# 这里路径需要自己改一下
json_path = r"F:\COCO\annotations_trainval2017\annotations\instances_train2017.json" # 这里根据 train 还是 val 自己改

img_path = r"F:\COCO\train2017" # 这里根据 train 还是 val 自己改
img_name = "000000001757.jpg"

with open(json_path, encoding="utf-8") as f: # 这里编码直接使用 UTF-8 (不用可能会报错)
    all_coco_ann = json.load(f)


# -----------------------------------------------------------------------------
# --------------------- COCO 标注的大字典里找那张图片的信息 ---------------------
# -----------------------------------------------------------------------------
def get_COCO_img_info(img_name, all_coco_ann):
    
    # 从 COCO 标注的那个大字典里 找 img_name 的名字
    # 找到了就返回, 没找到就 return False
    
    for img_info in all_coco_ann["images"]:
        if img_info['file_name'] == img_name:
            return img_info
        else:
            continue
    return False 

img_info = get_COCO_img_info(img_name, all_coco_ann)
img_id = img_info['id'] # 实际上 图片的 id 就是 图片名字的一部分


# -----------------------------------------------------------------------------
# --------------------- COCO 标注的大字典里找那张图片的标注 ---------------------
# -----------------------------------------------------------------------------
def get_COCO_img_anno(img_id, all_coco_ann):
    
    # 根据图片的 id 找标注的信息
    # 找到了就返回那个列表, 没找到就 return []
    
    ann_list = []
    for ann_info in all_coco_ann["annotations"]:
        if ann_info['image_id'] == img_id:
            ann_list.append(ann_info)
        else:
            continue
    return ann_list 

ann_list = get_COCO_img_anno(img_id, all_coco_ann)


# -----------------------------------------------------------------------------
# ------------------------- 获取你想要的的类别的类别id  ------------------------
# -----------------------------------------------------------------------------
def get_categories_needed(category, all_coco_ann):
    
    # category 可以使一个类(字符串) 也可以是好几个类(字符串的列表)
    if isinstance(category, str):
        category = [category]
    
    cls_id2name = {}
    cls_name2id = {}
    for cls_info in all_coco_ann["categories"]:
        if cls_info['name'] in category:
            cls_id2name[cls_info['id']] = cls_info['name']
            cls_name2id[cls_info['name']] = cls_info['id']

    return cls_id2name, cls_name2id

category = ['person']
cls_id2name, cls_name2id = get_categories_needed(category, all_coco_ann)



# -----------------------------------------------------------------------------
# ---------------------- 根据已选择的类别挑选已获得的标注  ----------------------
# -----------------------------------------------------------------------------
def get_ann_needed(ann_list, cls_id2name):
    
    # 根据标注列表 ann_list 和 需要的类别字典 cls_id2name
    
    ann_you_want = []
    for ann in ann_list:
        if ann['category_id'] in cls_id2name:
            ann_you_want.append( (cls_id2name[ann['category_id']], ann['bbox']) )
    return ann_you_want

ann_needed = get_ann_needed(ann_list, cls_id2name)



# -----------------------------------------------------------------------------
# -------------------------------- 读图绘制bbox  -------------------------------
# -----------------------------------------------------------------------------
def drawBbox(img_array, ann_needed):
    
    # 在图片上绘制 bbox
    
    # 我没想到下面这句话这么重要!! 后面由于是传的引用会直接在原图上操作
    img_array = img_array.copy() 
    
    for name, (x_lt, y_lt, w, h) in ann_needed:
        img_array = cv2.rectangle(img_array, 
                                  (int(x_lt),   int(y_lt)),
                                  (int(x_lt+w), int(y_lt+h)),
                                  (0, 255, 0), # 这里可以根据类别自己换颜色
                                  3)
    
    import matplotlib.pyplot as plt
    # plt.figure(dpi=1000)
    plt.imshow(img_array[:, :, ::-1])
    plt.show()
    
    return img_array

img_array = cv_imread(os.path.join(img_path, img_name))
drawBbox(img_array, ann_needed)

这是那个图片:
在这里插入图片描述

这是那个画出来的在这里插入图片描述

再补一个画网格和画Anchor的函数

# -----------------------------------------------------------------------------
# --------------------------------- 图片画网格  --------------------------------
# -----------------------------------------------------------------------------
def drawGrid(img_array, zuo=15, xia=15):
    # 没好名字了
    # zuo 要切分成几行
    # xia 要切分成几列
    
    h, w, _ = img_array.shape
    
    zuo_inv = h / zuo
    xia_inv = w / xia

    lines = []
    # 画横线
    for i in range(1, zuo):
        y = int(zuo_inv * i)
        line = [(0, y), (w, y)]
        lines.append(line)

    # 画竖线
    for i in range(1, xia):
        x = int(xia_inv * i)
        line = [(x, 0), (x, h)]
        lines.append(line)
    
    # 绘制
    for p1, p2 in lines:
        img_array = cv2.line(img_array, p1, p2, (65, 183, 105), 1) # 此处根据需求自己加

    import matplotlib.pyplot as plt
    plt.figure(dpi=1000)
    plt.imshow(img_array[:, :, ::-1])
    plt.show()
    
    return img_array

img_array = cv_imread(os.path.join(img_path, img_name))
img_array_grid = drawGrid(img_array)

cv2.imwrite("img_array_grid.png", img_array_grid)



# -----------------------------------------------------------------------------
# -------------------------------- 绘制单个锚框  -------------------------------
# -----------------------------------------------------------------------------
def drawAnchorBox(img_array, anchor_center, w=100, h=100):
    
    # anchor_center 锚框的中点 (x, y)

    x, y = anchor_center
    pt1 = int(x-w/2), int(y-h/2)
    pt2 = int(x+w/2), int(y+h/2)

    img_array = cv2.rectangle(img_array, pt1, pt2, (255, 0, 25), 2)

    import matplotlib.pyplot as plt
    plt.figure(dpi=1000)
    plt.imshow(img_array[:, :, ::-1])
    plt.show()
    
    return img_array



# img_array_grid = drawAnchorBox(img_array, (250, 250))



# -----------------------------------------------------------------------------
# -------------------------------- 绘制多个锚框  -------------------------------
# -----------------------------------------------------------------------------
def drawAnchorBoxes(img_array, anchor_center, anchor_boxes):
    
    # anchor_center [(x, y), (x, y), (x, y), (x, y), ......]
    # anchor_boxes  [(宽, 高), (宽, 高), (宽, 高), (宽, 高), ...]

    for (x, y), (w, h) in zip(anchor_center, anchor_boxes):
        
        pt1 = int(x-w/2), int(y-h/2)
        pt2 = int(x+w/2), int(y+h/2)
        img_array = cv2.rectangle(img_array, pt1, pt2, (255, 0, 25), 2)

    import matplotlib.pyplot as plt
    plt.figure(dpi=1000)
    plt.imshow(img_array[:, :, ::-1])
    plt.show()
    
    return img_array


# anchor_center = [(50, 30), (300, 50), (120, 300)]
# anchor_boxes  = [(150, 30), (60, 50), (120, 30)]

anchor_center = [(191, 234), (191, 234), (191, 234)]
anchor_boxes  = [(200, 200), (150, 260), (260, 150)]

img_array_grid = drawAnchorBoxes(img_array, anchor_center, anchor_boxes)

在这里插入图片描述

这是绘制了Anchor的,Anchor是随便编的
在这里插入图片描述
这个是正经Anchor:
在这里插入图片描述

再补充一个计算IoU的:

这个是摘自博客:
https://blog.csdn.net/futangxiang4793/article/details/104482365
写的挺好的

def IoU(bbox, gt):
    """
    :param bbox: (n, 4) np.ndarray np.int
    :param gt:   (m, 4) np.ndarray np.int
    :return:     (n, m) np.ndarray np.int
    
    numpy 广播机制:从后(低维)向前(高维)对齐,维度为1的可以重复等价为任意维度
    eg: (4,3,2)   (3,2)  |   (3,2) 会扩充为(4,3,2)
        (4,1,2)   (3,2)  | (4,1,2) 会扩充为(4,3,2)  (3,2)扩充为(4,3,2) (扩充方法为重复)
    广播会在numpy的函数 如sum, maximun等函数中进行

    扩充维度的方法:
    eg: a  a.shape: (3,2)  a[:, None, :].shape: (3, 1, 2)  None对应的维度相当于newaxis
    
    摘自:
    https://blog.csdn.net/futangxiang4793/article/details/104482365
    """
    lt = np.maximum(bbox[:, None, :2], gt[:, :2])  # left_top (x, y)
    rb = np.minimum(bbox[:, None, 2:], gt[:, 2:])  # right_bottom (x, y)
    wh = np.maximum(rb - lt + 1, 0)                # inter_area (w, h)
    inter_areas = wh[:, :, 0] * wh[:, :, 1]        # shape: (n, m)
    box_areas = (bbox[:, 2] - bbox[:, 0] + 1) * (bbox[:, 3] - bbox[:, 1] + 1)
    gt_areas = (gt[:, 2] - gt[:, 0] + 1) * (gt[:, 3] - gt[:, 1] + 1)
    IoU = inter_areas / (box_areas[:, None] + gt_areas - inter_areas)
    return IoU
  • 2
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值