高分辨率图片目标检测

再也不玩炉石了

已于 2025-04-11 14:16:57 修改

阅读量348

点赞数 5

文章标签：目标检测计算机视觉 opencv

于 2025-04-11 14:15:16 首次发布

本文链接：https://blog.csdn.net/cnmdsbmdzz/article/details/147142656

版权

高分辨率图片目标检测

YOLO算法输入尺寸为640x640，当输入图像的分辨率高达几千x几千，甚至几万x几万时，会检测不准，那么本文借鉴YOLT的思路，实现高分辨率图片目标检测。
思路为：

确定裁剪尺寸，一般为包裹住最大物体即可，这样在裁剪后不会遗漏完整物体。
确定重叠部分尺寸，一般为能够使子图重新包裹被切割物体即可。
依次将裁剪子图输入模型，得到的检测框保存到list。
遍历完整张图片后，进行NMS处理，去除重复的检测框即可。

在这里插入图片描述
代码：

import math
import time
import cv2
import cv2 as cv
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator


def intersection_over_union(boxA, boxB):
    # 计算两个边界框的交并比(IOU)
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou


def non_max_suppression(boxes, scores, iou_threshold=0.5):
    """
    实现非极大值抑制(NMS)，输入是边界框和对应的分数，
    返回经过NMS处理后的边界框列表。
    """
    # 根据分数排序
    # sorted_indices = np.argsort(scores)[::-1]
    sorted_indices = np.arange(0, len(boxes))

    keep_indices= []
    while sorted_indices.size > 0:
        # 选择当前最高分的框
        idx = sorted_indices[0]
        keep_indices.append(idx)

        # 计算当前框与其他所有框的IOU
        ious = np.array([intersection_over_union(boxes[idx], boxes[i]) for i in sorted_indices[1:]])

        # 删除与当前框IOU大于阈值的框
        remove_indices = np.where(ious > iou_threshold)[0] + 1  # +1是因为我们忽略了第一个元素（当前最高分的框）
        sorted_indices = np.delete(sorted_indices, remove_indices)
        sorted_indices = np.delete(sorted_indices, 0)  # 移除已经处理过的最高分框的索引

    return keep_indices

def PanoDetection(ImageName):
    line_width = None
    font_size = None
    font = "Arial.ttf"
    weight = 'yolo11s.pt'
    model = YOLO(weight)
    orig_img = np.array(cv.imread(ImageName))
    #  裁切全景图像1920*1920作为输入，相邻裁切图像的重叠部分宽或高1280像素，总的pred_boxes设置为math.floor(orig.width(height) / 640)
    m, n = math.ceil((orig_img.shape[1] - 1920) / 640) + 1, math.ceil((orig_img.shape[0] - 1920) / 640) + 1

    pano_pred_boxes = []  # 存放所有框，再做NMS
    pano_pred_labels = []
    names = model.names

    annotator = Annotator(
        orig_img,
        line_width,
        font_size,
        font,
        False,
        example=names,
    )
    #  按顺序将每个裁切图像的检测框填入数组,
    for i in range(m):
        right = i * 640 + 1920 if (i * 640 + 1920) < orig_img.shape[1] else orig_img.shape[1]
        for j in range(n):
            bottom = j * 640 + 1920 if (j * 640 + 1920) < orig_img.shape[0] else orig_img.shape[0]
            # print(j * 640, bottom, i * 640, right)
            results = model.predict(orig_img[j * 640:bottom, i * 640:right], conf=0.9)
            # Plot Detect results
            for _, d in enumerate(reversed(results[0].boxes)):
                c, d_conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
                name = ("" if id is None else f"id:{id} ") + names[c]
                label = f"{name} {d_conf:.2f}"
                box = d.xyxy.squeeze().clone()
                #  对每个检测框的坐标加上偏移
                box[0] += i * 640
                box[1] += j * 640
                box[2] += i * 640
                box[3] += j * 640
                pano_pred_boxes.append(box.cpu())
                pano_pred_labels.append(label)
    total_classes = np.zeros(len(names), dtype=np.uint8)
    keep_indices = non_max_suppression(pano_pred_boxes, None, iou_threshold=0.4)
    for i in range(len(keep_indices)):
        for key in names:
            if names[key] in pano_pred_labels[keep_indices[i]]:
                total_classes[key] += 1
        annotator.box_label(
            pano_pred_boxes[keep_indices[i]],
            pano_pred_labels[keep_indices[i]],
            color=(0, 0, 255),
            rotated=False,
        )
    save_path = ImageName.split('.')[0] + 'DetectNMS.jpg'
    cv2.imwrite(save_path, orig_img)
    s = ''
    for i in range(total_classes.shape[0]):
        s += f'{total_classes[i]}个{names[i]} '
        print(s)
    return save_path, s


if __name__ == '__main__':
    time1 = time.time()
    ImageName = ""
    PanoDetection(ImageName)
    time2 = time.time()
    print(time2 - time1, 's')