yololov5-切割检测-detect.py

算法贝多芬

已于 2024-02-20 19:55:29 修改

阅读量942

点赞数 20

文章标签：目标跟踪人工智能计算机视觉 python yolov5 图像处理

于 2024-02-20 19:54:35 首次发布

本文链接：https://blog.csdn.net/m0_56127259/article/details/136197628

版权

文章介绍了如何使用YOLOv5模型对大图像进行分割，将图像切割为8个小图进行目标检测，然后将检测结果合并回原图的过程，涉及crop_image,merge_results和run_and_save函数的详细实现。

摘要由CSDN通过智能技术生成

将图像切割为8张小图，对每个小图进行目标检测后，将检测结果拼接到最后的大图。代码的工作原理：

定义了一个crop_image函数，用于将输入的大图像切割成指定大小的小图像，并返回切割后的小图像列表。
定义了一个merge_results函数，用于将检测结果合并到原始大图像上。在函数中，首先定义了类别名称列表names，然后遍历每个小图像的检测结果，将检测框绘制在原始大图像上，并添加类别和置信度信息。
定义run_and_save函数，该函数包含了整个流程的实现。首先加载模型和数据集，然后对每个输入图像进行处理。在处理过程中，将输入图像切割为多个小图像，并对每个小图像进行目标检测。最后，调用merge_results函数将检测结果合并到原始大图像上，并保存带有检测结果的图像。

最后修改了参数，指定了模型权重文件、输入图像文件夹路径、数据集描述文件路径以及结果保存目录路径等参数，并调用run_and_save函数执行整个流程。

# 将图像切割为8张小图，对每个小图单独检测后，在将检测结果拼接到最后的大图


import cv2
import numpy as np
import os
from torchvision.transforms import functional as F
from pathlib import Path

import argparse
import os
import sys
from pathlib import Path

import torch
import torch.backends.cudnn as cudnn

FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

from models.common import DetectMultiBackend
from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
from utils.general import (
    LOGGER,
    check_file,
    check_img_size,
    check_imshow,
    check_requirements,
    colorstr,
    cv2,
    increment_path,
    non_max_suppression,
    print_args,
    scale_coords,
    strip_optimizer,
    xyxy2xywh,
)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, time_sync


def crop_image(image, crop_size):
    print("Input image size:", image.shape[:2])
    height, width = image.shape[:2]
    cropped_images = []
    count = 0
    for y in range(0, height, crop_size[1]):
        for x in range(0, width, crop_size[0]):
            if y + crop_size[1] <= height and x + crop_size[0] <= width:
                crop = image[y : y + crop_size[1], x : x + crop_size[0]]
                if crop.shape[1::-1] == (512, 512):
                    cropped_images.append(crop)
                else:
                    print("Cropped image size:", crop.shape[:2])  # 添加这行
    return cropped_images


def merge_results(original_image, cropped_images, detections, crop_size):
    names = ["cf", "sz", "tc", "os", "rd", "unknown"]
    merged_image = original_image.copy()
    for i, cet in enumerate(detections):
        det = cet[0]
        print(f"Index: {i}, Value: {det}")
        print("det长度:", len(det))

        if det is None or det.numel() == 0:  # 检查det是否为空
            continue

        crop_coordinates = [
            [(0, 0), (640, 640)],
            [(640, 0), (1280, 640)],
            [(1280, 0), (1920, 640)],
            [(1920, 0), (2560, 640)],
            [(0, 640), (640, 1280)],
            [(640, 640), (1280, 1280)],
            [(1280, 640), (1920, 1280)],
            [(1920, 640), (2560, 1280)],
        ]

        x1_crop, y1_crop = (
            crop_coordinates[i][0][0],
            crop_coordinates[i][0][1],
        )  # 获取裁剪区域的左上角坐标
        print(x1_crop, y1_crop)
        x2_crop, y2_crop = (
            crop_coordinates[i][1][0],
            crop_coordinates[i][1][1],
        )  # 获取裁剪区域的右下角坐标
        print(x2_crop, y2_crop)

        for box, conf, cls in zip(det[:, :4], det[:, 4], det[:, 5]):
            box = box.detach().cpu().numpy()
            box = np.array(box)  # 将张量或列表对象转换为 NumPy 数组
            xyxy = box.astype(int)

            # 转换为基于大图像的坐标
            offset = np.array([x1_crop, y1_crop])
            x1_merged, y1_merged = (xyxy[:2] + offset).astype(int)
            x2_merged, y2_merged = (xyxy[2:] + offset).astype(int)

            label = f"{names[int(cls)]} {conf:.2f}"
            cv2.rectangle(
                merged_image,
                (x1_merged, y1_merged),
                (x2_merged, y2_merged),
                (255, 0, 0),
                thickness=3,
            )
            cv2.putText(
                merged_image,
                label,
                (x1_merged, y1_merged - 10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.9,
                (255, 0, 0),
                2,
            )

    return merged_image


def run_and_save(
    weights, source, data, imgsz, conf_thres, iou_thres, max_det, device, save_dir
):
    source = str(source)
    save_img = True
    # is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
    is_file = Path(source).suffix[1:].lower() in [
        ".jpg",
        ".jpeg",
        ".png",
        ".tif",
        ".tiff",
        ".bmp",
    ]
    is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
    webcam = source.isnumeric() or source.endswith(".txt") or (is_url and not is_file)
    if is_url and is_file:
        source = check_file(source)  # download

    # Directories
    save_dir.mkdir(parents=True, exist_ok=True)  # make dir

    # Load model
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model = DetectMultiBackend(weights, device=device, dnn=False, data=data, fp16=False)
    stride, names, pt = model.stride, model.names, model.pt
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Dataloader
    if webcam:
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
        bs = 1  # batch_size

    # Run inference
    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup
    total_detections = 0
    for path, im, im0s, vid_cap, s in dataset:
        im0 = im0s.copy()
        cropped_images = crop_image(im0, crop_size=[640, 640])
        detections = []
        for cropped_image in cropped_images:
            cropped_image_tensor = (
                torch.from_numpy(cropped_image)
                .unsqueeze(0)
                .permute(0, 3, 1, 2)
                .to(device)
                .float()
                / 255.0
            )
            pred = model(cropped_image_tensor, augment=False, visualize=False)
            pred = non_max_suppression(
                pred,
                conf_thres,
                iou_thres,
                classes=None,
                agnostic=False,
                max_det=max_det,
            )
            detections.append(pred)

        merged_image = merge_results(
            im0, cropped_images, detections, crop_size=[512, 512]
        )
        lengths = [len(item[0]) for item in detections]
        # print(lengths)
        total_length = sum(lengths)

        print(f"Total detections: {total_detections}")  # 打印总的预测框数量

        # # Save results (image with detections)
        if save_img:
            save_path = save_dir / (Path(path).stem + ".jpg")
            cv2.imwrite(str(save_path), merged_image)


# 修改后的参数
# 请根据您的实际情况修改以下路径
ROOT = Path(os.getcwd())  # 根目录路径，这里使用当前工作目录作为根目录

weights = (./onnx
  
)  
source = "./"  # 输入图像文件夹的路径

data = ROOT / "data/myvoc.yaml"  # 数据集描述文件的路径
save_dir = ROOT / "./output"  # 结果保存目录的路径，这里将结果保存到与脚本相同的目录下的output文件夹


imgsz = (640, 640)
conf_thres = 0.31
iou_thres = 0.45
max_det = 1000
device = ""


run_and_save(
    weights, source, data, imgsz, conf_thres, iou_thres, max_det, device, save_dir
)

算法贝多芬

关注

20
点赞
踩
24

收藏

觉得还不错? 一键收藏
0
评论
yololov5-切割检测-detect.py

函数，该函数包含了整个流程的实现。在处理过程中，将输入图像切割为多个小图像，并对每个小图像进行目标检测。最后修改了参数，指定了模型权重文件、输入图像文件夹路径、数据集描述文件路径以及结果保存目录路径等参数，并调用。将图像切割为8张小图，对每个小图进行目标检测后，将检测结果拼接到最后的大图。，然后遍历每个小图像的检测结果，将检测框绘制在原始大图像上，并添加类别和置信度信息。函数，用于将输入的大图像切割成指定大小的小图像，并返回切割后的小图像列表。函数将检测结果合并到原始大图像上，并保存带有检测结果的图像。
复制链接

扫一扫