高分辨率图片目标检测
YOLO算法输入尺寸为640x640,当输入图像的分辨率高达几千x几千,甚至几万x几万时,会检测不准,那么本文借鉴YOLT的思路,实现高分辨率图片目标检测。
思路为:
- 确定裁剪尺寸,一般为包裹住最大物体即可,这样在裁剪后不会遗漏完整物体。
- 确定重叠部分尺寸,一般为能够使子图重新包裹被切割物体即可。
- 依次将裁剪子图输入模型,得到的检测框保存到list。
- 遍历完整张图片后,进行NMS处理,去除重复的检测框即可。
代码:
import math
import time
import cv2
import cv2 as cv
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator
def intersection_over_union(boxA, boxB):
# 计算两个边界框的交并比(IOU)
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
iou = interArea / float(boxAArea + boxBArea - interArea)
return iou
def non_max_suppression(boxes, scores, iou_threshold=0.5):
"""
实现非极大值抑制(NMS),输入是边界框和对应的分数,
返回经过NMS处理后的边界框列表。
"""
# 根据分数排序
# sorted_indices = np.argsort(scores)[::-1]
sorted_indices = np.arange(0, len(boxes))
keep_indices= []
while sorted_indices.size > 0:
# 选择当前最高分的框
idx = sorted_indices[0]
keep_indices.append(idx)
# 计算当前框与其他所有框的IOU
ious = np.array([intersection_over_union(boxes[idx], boxes[i]) for i in sorted_indices[1:]])
# 删除与当前框IOU大于阈值的框
remove_indices = np.where(ious > iou_threshold)[0] + 1 # +1是因为我们忽略了第一个元素(当前最高分的框)
sorted_indices = np.delete(sorted_indices, remove_indices)
sorted_indices = np.delete(sorted_indices, 0) # 移除已经处理过的最高分框的索引
return keep_indices
def PanoDetection(ImageName):
line_width = None
font_size = None
font = "Arial.ttf"
weight = 'yolo11s.pt'
model = YOLO(weight)
orig_img = np.array(cv.imread(ImageName))
# 裁切全景图像1920*1920作为输入,相邻裁切图像的重叠部分宽或高1280像素,总的pred_boxes设置为math.floor(orig.width(height) / 640)
m, n = math.ceil((orig_img.shape[1] - 1920) / 640) + 1, math.ceil((orig_img.shape[0] - 1920) / 640) + 1
pano_pred_boxes = [] # 存放所有框,再做NMS
pano_pred_labels = []
names = model.names
annotator = Annotator(
orig_img,
line_width,
font_size,
font,
False,
example=names,
)
# 按顺序将每个裁切图像的检测框填入数组,
for i in range(m):
right = i * 640 + 1920 if (i * 640 + 1920) < orig_img.shape[1] else orig_img.shape[1]
for j in range(n):
bottom = j * 640 + 1920 if (j * 640 + 1920) < orig_img.shape[0] else orig_img.shape[0]
# print(j * 640, bottom, i * 640, right)
results = model.predict(orig_img[j * 640:bottom, i * 640:right], conf=0.9)
# Plot Detect results
for _, d in enumerate(reversed(results[0].boxes)):
c, d_conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
name = ("" if id is None else f"id:{id} ") + names[c]
label = f"{name} {d_conf:.2f}"
box = d.xyxy.squeeze().clone()
# 对每个检测框的坐标加上偏移
box[0] += i * 640
box[1] += j * 640
box[2] += i * 640
box[3] += j * 640
pano_pred_boxes.append(box.cpu())
pano_pred_labels.append(label)
total_classes = np.zeros(len(names), dtype=np.uint8)
keep_indices = non_max_suppression(pano_pred_boxes, None, iou_threshold=0.4)
for i in range(len(keep_indices)):
for key in names:
if names[key] in pano_pred_labels[keep_indices[i]]:
total_classes[key] += 1
annotator.box_label(
pano_pred_boxes[keep_indices[i]],
pano_pred_labels[keep_indices[i]],
color=(0, 0, 255),
rotated=False,
)
save_path = ImageName.split('.')[0] + 'DetectNMS.jpg'
cv2.imwrite(save_path, orig_img)
s = ''
for i in range(total_classes.shape[0]):
s += f'{total_classes[i]}个{names[i]} '
print(s)
return save_path, s
if __name__ == '__main__':
time1 = time.time()
ImageName = ""
PanoDetection(ImageName)
time2 = time.time()
print(time2 - time1, 's')