yololov5-切割检测-detect.py

将图像切割为8张小图,对每个小图进行目标检测后,将检测结果拼接到最后的大图。代码的工作原理:

  1. 定义了一个crop_image函数,用于将输入的大图像切割成指定大小的小图像,并返回切割后的小图像列表。

  2. 定义了一个merge_results函数,用于将检测结果合并到原始大图像上。在函数中,首先定义了类别名称列表names,然后遍历每个小图像的检测结果,将检测框绘制在原始大图像上,并添加类别和置信度信息。

  3. 定义run_and_save函数,该函数包含了整个流程的实现。首先加载模型和数据集,然后对每个输入图像进行处理。在处理过程中,将输入图像切割为多个小图像,并对每个小图像进行目标检测。最后,调用merge_results函数将检测结果合并到原始大图像上,并保存带有检测结果的图像。

  4. 最后修改了参数,指定了模型权重文件、输入图像文件夹路径、数据集描述文件路径以及结果保存目录路径等参数,并调用run_and_save函数执行整个流程。

    # 将图像切割为8张小图,对每个小图单独检测后,在将检测结果拼接到最后的大图
    
    
    import cv2
    import numpy as np
    import os
    from torchvision.transforms import functional as F
    from pathlib import Path
    
    import argparse
    import os
    import sys
    from pathlib import Path
    
    import torch
    import torch.backends.cudnn as cudnn
    
    FILE = Path(__file__).resolve()
    ROOT = FILE.parents[0]  # YOLOv5 root directory
    if str(ROOT) not in sys.path:
        sys.path.append(str(ROOT))  # add ROOT to PATH
    ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
    
    from models.common import DetectMultiBackend
    from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
    from utils.general import (
        LOGGER,
        check_file,
        check_img_size,
        check_imshow,
        check_requirements,
        colorstr,
        cv2,
        increment_path,
        non_max_suppression,
        print_args,
        scale_coords,
        strip_optimizer,
        xyxy2xywh,
    )
    from utils.plots import Annotator, colors, save_one_box
    from utils.torch_utils import select_device, time_sync
    
    
    def crop_image(image, crop_size):
        print("Input image size:", image.shape[:2])
        height, width = image.shape[:2]
        cropped_images = []
        count = 0
        for y in range(0, height, crop_size[1]):
            for x in range(0, width, crop_size[0]):
                if y + crop_size[1] <= height and x + crop_size[0] <= width:
                    crop = image[y : y + crop_size[1], x : x + crop_size[0]]
                    if crop.shape[1::-1] == (512, 512):
                        cropped_images.append(crop)
                    else:
                        print("Cropped image size:", crop.shape[:2])  # 添加这行
        return cropped_images
    
    
    def merge_results(original_image, cropped_images, detections, crop_size):
        names = ["cf", "sz", "tc", "os", "rd", "unknown"]
        merged_image = original_image.copy()
        for i, cet in enumerate(detections):
            det = cet[0]
            print(f"Index: {i}, Value: {det}")
            print("det长度:", len(det))
    
            if det is None or det.numel() == 0:  # 检查det是否为空
                continue
    
            crop_coordinates = [
                [(0, 0), (640, 640)],
                [(640, 0), (1280, 640)],
                [(1280, 0), (1920, 640)],
                [(1920, 0), (2560, 640)],
                [(0, 640), (640, 1280)],
                [(640, 640), (1280, 1280)],
                [(1280, 640), (1920, 1280)],
                [(1920, 640), (2560, 1280)],
            ]
    
            x1_crop, y1_crop = (
                crop_coordinates[i][0][0],
                crop_coordinates[i][0][1],
            )  # 获取裁剪区域的左上角坐标
            print(x1_crop, y1_crop)
            x2_crop, y2_crop = (
                crop_coordinates[i][1][0],
                crop_coordinates[i][1][1],
            )  # 获取裁剪区域的右下角坐标
            print(x2_crop, y2_crop)
    
            for box, conf, cls in zip(det[:, :4], det[:, 4], det[:, 5]):
                box = box.detach().cpu().numpy()
                box = np.array(box)  # 将张量或列表对象转换为 NumPy 数组
                xyxy = box.astype(int)
    
                # 转换为基于大图像的坐标
                offset = np.array([x1_crop, y1_crop])
                x1_merged, y1_merged = (xyxy[:2] + offset).astype(int)
                x2_merged, y2_merged = (xyxy[2:] + offset).astype(int)
    
                label = f"{names[int(cls)]} {conf:.2f}"
                cv2.rectangle(
                    merged_image,
                    (x1_merged, y1_merged),
                    (x2_merged, y2_merged),
                    (255, 0, 0),
                    thickness=3,
                )
                cv2.putText(
                    merged_image,
                    label,
                    (x1_merged, y1_merged - 10),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.9,
                    (255, 0, 0),
                    2,
                )
    
        return merged_image
    
    
    def run_and_save(
        weights, source, data, imgsz, conf_thres, iou_thres, max_det, device, save_dir
    ):
        source = str(source)
        save_img = True
        # is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
        is_file = Path(source).suffix[1:].lower() in [
            ".jpg",
            ".jpeg",
            ".png",
            ".tif",
            ".tiff",
            ".bmp",
        ]
        is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
        webcam = source.isnumeric() or source.endswith(".txt") or (is_url and not is_file)
        if is_url and is_file:
            source = check_file(source)  # download
    
        # Directories
        save_dir.mkdir(parents=True, exist_ok=True)  # make dir
    
        # Load model
        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        model = DetectMultiBackend(weights, device=device, dnn=False, data=data, fp16=False)
        stride, names, pt = model.stride, model.names, model.pt
        imgsz = check_img_size(imgsz, s=stride)  # check image size
    
        # Dataloader
        if webcam:
            cudnn.benchmark = True  # set True to speed up constant image size inference
            dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
            bs = len(dataset)  # batch_size
        else:
            dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
            bs = 1  # batch_size
    
        # Run inference
        model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup
        total_detections = 0
        for path, im, im0s, vid_cap, s in dataset:
            im0 = im0s.copy()
            cropped_images = crop_image(im0, crop_size=[640, 640])
            detections = []
            for cropped_image in cropped_images:
                cropped_image_tensor = (
                    torch.from_numpy(cropped_image)
                    .unsqueeze(0)
                    .permute(0, 3, 1, 2)
                    .to(device)
                    .float()
                    / 255.0
                )
                pred = model(cropped_image_tensor, augment=False, visualize=False)
                pred = non_max_suppression(
                    pred,
                    conf_thres,
                    iou_thres,
                    classes=None,
                    agnostic=False,
                    max_det=max_det,
                )
                detections.append(pred)
    
            merged_image = merge_results(
                im0, cropped_images, detections, crop_size=[512, 512]
            )
            lengths = [len(item[0]) for item in detections]
            # print(lengths)
            total_length = sum(lengths)
    
            print(f"Total detections: {total_detections}")  # 打印总的预测框数量
    
            # # Save results (image with detections)
            if save_img:
                save_path = save_dir / (Path(path).stem + ".jpg")
                cv2.imwrite(str(save_path), merged_image)
    
    
    # 修改后的参数
    # 请根据您的实际情况修改以下路径
    ROOT = Path(os.getcwd())  # 根目录路径,这里使用当前工作目录作为根目录
    
    weights = (./onnx
      
    )  
    source = "./"  # 输入图像文件夹的路径
    
    data = ROOT / "data/myvoc.yaml"  # 数据集描述文件的路径
    save_dir = ROOT / "./output"  # 结果保存目录的路径,这里将结果保存到与脚本相同的目录下的output文件夹
    
    
    imgsz = (640, 640)
    conf_thres = 0.31
    iou_thres = 0.45
    max_det = 1000
    device = ""
    
    
    run_and_save(
        weights, source, data, imgsz, conf_thres, iou_thres, max_det, device, save_dir
    )
    

  • 18
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值