将图像切割为8张小图,对每个小图进行目标检测后,将检测结果拼接到最后的大图。代码的工作原理:
-
定义了一个
crop_image
函数,用于将输入的大图像切割成指定大小的小图像,并返回切割后的小图像列表。 -
定义了一个
merge_results
函数,用于将检测结果合并到原始大图像上。在函数中,首先定义了类别名称列表names
,然后遍历每个小图像的检测结果,将检测框绘制在原始大图像上,并添加类别和置信度信息。 -
定义
run_and_save
函数,该函数包含了整个流程的实现。首先加载模型和数据集,然后对每个输入图像进行处理。在处理过程中,将输入图像切割为多个小图像,并对每个小图像进行目标检测。最后,调用merge_results
函数将检测结果合并到原始大图像上,并保存带有检测结果的图像。 -
最后修改了参数,指定了模型权重文件、输入图像文件夹路径、数据集描述文件路径以及结果保存目录路径等参数,并调用
run_and_save
函数执行整个流程。# 将图像切割为8张小图,对每个小图单独检测后,在将检测结果拼接到最后的大图 import cv2 import numpy as np import os from torchvision.transforms import functional as F from pathlib import Path import argparse import os import sys from pathlib import Path import torch import torch.backends.cudnn as cudnn FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams from utils.general import ( LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh, ) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import select_device, time_sync def crop_image(image, crop_size): print("Input image size:", image.shape[:2]) height, width = image.shape[:2] cropped_images = [] count = 0 for y in range(0, height, crop_size[1]): for x in range(0, width, crop_size[0]): if y + crop_size[1] <= height and x + crop_size[0] <= width: crop = image[y : y + crop_size[1], x : x + crop_size[0]] if crop.shape[1::-1] == (512, 512): cropped_images.append(crop) else: print("Cropped image size:", crop.shape[:2]) # 添加这行 return cropped_images def merge_results(original_image, cropped_images, detections, crop_size): names = ["cf", "sz", "tc", "os", "rd", "unknown"] merged_image = original_image.copy() for i, cet in enumerate(detections): det = cet[0] print(f"Index: {i}, Value: {det}") print("det长度:", len(det)) if det is None or det.numel() == 0: # 检查det是否为空 continue crop_coordinates = [ [(0, 0), (640, 640)], [(640, 0), (1280, 640)], [(1280, 0), (1920, 640)], [(1920, 0), (2560, 640)], [(0, 640), (640, 1280)], [(640, 640), (1280, 1280)], [(1280, 640), (1920, 1280)], [(1920, 640), (2560, 1280)], ] x1_crop, y1_crop = ( crop_coordinates[i][0][0], crop_coordinates[i][0][1], ) # 获取裁剪区域的左上角坐标 print(x1_crop, y1_crop) x2_crop, y2_crop = ( crop_coordinates[i][1][0], crop_coordinates[i][1][1], ) # 获取裁剪区域的右下角坐标 print(x2_crop, y2_crop) for box, conf, cls in zip(det[:, :4], det[:, 4], det[:, 5]): box = box.detach().cpu().numpy() box = np.array(box) # 将张量或列表对象转换为 NumPy 数组 xyxy = box.astype(int) # 转换为基于大图像的坐标 offset = np.array([x1_crop, y1_crop]) x1_merged, y1_merged = (xyxy[:2] + offset).astype(int) x2_merged, y2_merged = (xyxy[2:] + offset).astype(int) label = f"{names[int(cls)]} {conf:.2f}" cv2.rectangle( merged_image, (x1_merged, y1_merged), (x2_merged, y2_merged), (255, 0, 0), thickness=3, ) cv2.putText( merged_image, label, (x1_merged, y1_merged - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2, ) return merged_image def run_and_save( weights, source, data, imgsz, conf_thres, iou_thres, max_det, device, save_dir ): source = str(source) save_img = True # is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_file = Path(source).suffix[1:].lower() in [ ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp", ] is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://")) webcam = source.isnumeric() or source.endswith(".txt") or (is_url and not is_file) if is_url and is_file: source = check_file(source) # download # Directories save_dir.mkdir(parents=True, exist_ok=True) # make dir # Load model device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") model = DetectMultiBackend(weights, device=device, dnn=False, data=data, fp16=False) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup total_detections = 0 for path, im, im0s, vid_cap, s in dataset: im0 = im0s.copy() cropped_images = crop_image(im0, crop_size=[640, 640]) detections = [] for cropped_image in cropped_images: cropped_image_tensor = ( torch.from_numpy(cropped_image) .unsqueeze(0) .permute(0, 3, 1, 2) .to(device) .float() / 255.0 ) pred = model(cropped_image_tensor, augment=False, visualize=False) pred = non_max_suppression( pred, conf_thres, iou_thres, classes=None, agnostic=False, max_det=max_det, ) detections.append(pred) merged_image = merge_results( im0, cropped_images, detections, crop_size=[512, 512] ) lengths = [len(item[0]) for item in detections] # print(lengths) total_length = sum(lengths) print(f"Total detections: {total_detections}") # 打印总的预测框数量 # # Save results (image with detections) if save_img: save_path = save_dir / (Path(path).stem + ".jpg") cv2.imwrite(str(save_path), merged_image) # 修改后的参数 # 请根据您的实际情况修改以下路径 ROOT = Path(os.getcwd()) # 根目录路径,这里使用当前工作目录作为根目录 weights = (./onnx ) source = "./" # 输入图像文件夹的路径 data = ROOT / "data/myvoc.yaml" # 数据集描述文件的路径 save_dir = ROOT / "./output" # 结果保存目录的路径,这里将结果保存到与脚本相同的目录下的output文件夹 imgsz = (640, 640) conf_thres = 0.31 iou_thres = 0.45 max_det = 1000 device = "" run_and_save( weights, source, data, imgsz, conf_thres, iou_thres, max_det, device, save_dir )