yolov8的predict使用方法,更改predict.py的输出结果,输出label的真实坐标,保存图片和txt文档,图片中没有异物生成空的txt文档

Yolov8的predict的使用方法:

新建predict.py如下:

from ultralytics import YOLO


model = YOLO("你训练好的模型.pt")
model.predict(source="datasets/images/val",save=True,save_conf=True,save_txt=True,name='output')

#source后为要预测的图片数据集的的路径
#save=True为保存预测结果
#save_conf=True为保存坐标信息
#save_txt=True为保存txt结果,但是yolov8本身当图片中预测不到异物时,不产生txt文件

默认predict的输出坐标为xywh格式,即中心点坐标和预测的框的宽高,将其改为真实坐标方法如下:在ultralytics\engine\results.py中,将def save_txt()置换为如下代码:

def save_txt(self, txt_file, save_conf=False):
        """
        Save predictions into txt file.

        Args:
            txt_file (str): txt file path.
            save_conf (bool): save confidence score or not.
        """
        boxes = self.boxes
        masks = self.masks
        probs = self.probs
        kpts = self.keypoints
        texts = []
        if probs is not None:
            # Classify
            [texts.append(f'{probs.data[j]:.2f} {self.names[j]}') for j in probs.top5]
        elif boxes:
            # Detect/segment/pose
            for j, d in enumerate(boxes):
                c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
                #line = (c, *d.xywhn.view(-1))
                line = ( c,conf, *d.xyxy.view(-1))  #重点在这里,还可以通过这里改变txt中信息#的顺序
                if masks:
                    seg = masks[j].xyn[0].copy().reshape(-1)  # reversed mask.xyn, (n,2) to (n*2)
                    line = (c, *seg)
                if kpts is not None:
                    kpt = torch.cat((kpts[j].xyn, kpts[j].conf[..., None]), 2) if kpts[j].has_visible else kpts[j].xyn
                    line += (*kpt.reshape(-1).tolist(), )
                line += (conf, ) * save_conf + (() if id is None else (id, ))
                line = line[:-1]
                texts.append('%s %.6f %d %d %d %d' % (line[0], float(line[1]), int(line[2]), int(line[3]), int(line[4]), int(line[5])))

        if texts:
            Path(txt_file).parent.mkdir(parents=True, exist_ok=True)  # make directory
            with open(txt_file, 'a') as f:
                f.writelines(text + '\n' for text in texts)

要实现图片中没有异物生成空的txt文档,采用如下方法:

在ultralytics\engine\predictor.py中,找到def stream_inference(),将其置换为如下代码:

def stream_inference(self, source=None, model=None, *args, **kwargs):
        """Streams real-time inference on camera feed and saves results to file."""
        if self.args.verbose:
            LOGGER.info('')

        # Setup model
        if not self.model:
            self.setup_model(model)

        # Setup source every time predict is called
        self.setup_source(source if source is not None else self.args.source)

        # Check if save_dir/ label file exists
        if self.args.save or self.args.save_txt:
            (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)

        # Warmup model
        if not self.done_warmup:
            self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz))
            self.done_warmup = True

        self.seen, self.windows, self.batch, profilers = 0, [], None, (ops.Profile(), ops.Profile(), ops.Profile())
        self.run_callbacks('on_predict_start')
        for batch in self.dataset:
            self.run_callbacks('on_predict_batch_start')
            self.batch = batch
            path, im0s, vid_cap, s = batch

            # Preprocess
            with profilers[0]:
                im = self.preprocess(im0s)

            # Inference
            with profilers[1]:
                preds = self.inference(im, *args, **kwargs)

            # Postprocess
            with profilers[2]:
                self.results = self.postprocess(preds, im, im0s)
            self.run_callbacks('on_predict_postprocess_end')

            # Visualize, save, write results
            n = len(im0s)
            for i in range(n):
                self.seen += 1
                self.results[i].speed = {
                    'preprocess': profilers[0].dt * 1E3 / n,
                    'inference': profilers[1].dt * 1E3 / n,
                    'postprocess': profilers[2].dt * 1E3 / n}
                p, im0 = path[i], None if self.source_type.tensor else im0s[i].copy()
                p = Path(p)

                if self.args.verbose or self.args.save or self.args.save_txt or self.args.show:
                    s += self.write_results(i, self.results, (p, im, im0))
                if self.args.save or self.args.save_txt:
                    self.results[i].save_dir = self.save_dir.__str__()
                if self.args.show and self.plotted_img is not None:
                    self.show(p)
                if self.args.save and self.plotted_img is not None:
                    self.save_preds(vid_cap, i, str(self.save_dir / p.name))

            self.run_callbacks('on_predict_batch_end')
            yield from self.results

            # Print time (inference-only)
            if self.args.verbose:
                LOGGER.info(f'{s}{profilers[1].dt * 1E3:.1f}ms')

        # Release assets
        if isinstance(self.vid_writer[-1], cv2.VideoWriter):
            self.vid_writer[-1].release()  # release final video writer

        # Print results
        if self.args.verbose and self.seen:
            t = tuple(x.t / self.seen * 1E3 for x in profilers)  # speeds per image
            LOGGER.info(f'Speed: %.1fms preprocess, %.1fms inference, %.1fms postprocess per image at shape '
                        f'{(1, 3, *im.shape[2:])}' % t)
        if self.args.save or self.args.save_txt or self.args.save_crop:
            nl = len(list(self.save_dir.glob('labels/*.txt')))  # number of labels
            s = f"\n{nl} label{'s' * (nl > 1)} saved to {self.save_dir / 'labels'}" if self.args.save_txt else ''
            m = f"\n{nl} image{'s' * (nl > 1)} saved to {self.save_dir / 'images'}" if self.args.save_txt else ''
            LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}")
            # 原始文件夹路径和目标文件夹路径
        source_folder = self.save_dir
        target_folder = os.path.join(source_folder, 'images')
        target_label_folder = os.path.join(source_folder, 'labels')

        # 创建目标文件夹
        os.makedirs(target_folder, exist_ok=True)

        # 获取原始文件夹中的所有文件
        files = os.listdir(source_folder)

        # 遍历所有文件
        for file in files:
            # 构建文件的绝对路径
            file_path = os.path.join(source_folder, file)
            
            # 检查文件是否是图片文件
            if os.path.isfile(file_path) and file.lower().endswith(('.jpg', '.jpeg', '.png')):
                # 构建目标文件的路径
                target_file_path = os.path.join(target_folder, file)
                
                # 将图片文件复制到目标文件夹
                shutil.copy(file_path, target_file_path)
                
                # 删除原始文件夹中的图片文件
                os.remove(file_path)

        image_folder =  target_folder
        txt_folder = target_label_folder
        image_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('.jpg', '.jpeg', '.png'))]
        # 遍历图片文件夹中的图片文件
        for image_file in image_files:
        # 构建图片和txt文件的路径
            image_file_path = os.path.join(image_folder, image_file)
            txt_file_path = os.path.join(txt_folder, os.path.splitext(image_file)[0] + '.txt')

        # 检查txt文件是否已存在
            if os.path.isfile(txt_file_path):
            #print(f"txt文件'{txt_file_path}'已存在,不执行操作。")
                pass
            else:
            # 创建空白的txt文件
                with open(txt_file_path, 'w') as txt_file:
                    pass


        self.run_callbacks('on_predict_end')

### 使用YOLO模型进行物体检测预测 为了使用YOLO模型执行物体检测预测,通常需要准备环境并加载预训练权重文件。以下是具体过程: #### 准备工作 安装必要的库对于运行YOLO至关重要。这包括但不限于OpenCV用于图像处理、NumPy作为科学计算的基础包以及Darknet框架本身或是其PyTorch版本——取决于所使用YOLO变体。 ```bash pip install opencv-python numpy torch torchvision torchaudio ``` #### 加载YOLO模型 根据所需的YOLO版本下载对应的配置文件(`.cfg`)、类别名称列表(`.names`)预训练权重(`.weights`)。这些资源可以从官方GitHub仓库或其他可信来源获取[^1]。 ```python import cv2 import numpy as np # 定义路径变量 config_path = 'path/to/yolovX.cfg' weights_path = 'path/to/yolovX.weights' classes_file = 'path/to/coco.names' # 读取类名 with open(classes_file, 'rt') as f: classes = f.read().rstrip('\n').split('\n') # 初始化网络 net = cv2.dnn.readNetFromDarknet(config_path, weights_path) # 设置后端目标设备 net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) # 或者 DNN_TARGET_CUDA 如果GPU可用的话 ``` #### 执行推理 一旦完成了上述准备工作,则可以开始对输入图片或视频帧进行推断操作。这里展示了一个简单的例子来说明如何完成这一任务。 ```python def get_output_layers(net): layer_names = net.getLayerNames() output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] return output_layers def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h): label = str(classes[class_id]) color = COLORS[class_id] cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2) cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) image = cv2.imread('input_image.jpg') Width = image.shape[1] Height = image.shape[0] scale = 0.00392 blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False) net.setInput(blob) outs = net.forward(get_output_layers(net)) class_ids = [] confidences = [] boxes = [] for out in outs: for detection in out: scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] if confidence > 0.5: center_x = int(detection[0] * Width) center_y = int(detection[1] * Height) w = int(detection[2] * Width) h = int(detection[3] * Height) x = center_x - w / 2 y = center_y - h / 2 class_ids.append(class_id) confidences.append(float(confidence)) boxes.append([x, y, w, h]) indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4) for i in indices: box = boxes[i[0]] x = box[0] y = box[1] w = box[2] h = box[3] draw_prediction(image, class_ids[i[0]], confidences[i[0]], round(x), round(y), round(x+w), round(y+h)) cv2.imshow("object detection", image) cv2.waitKey() cv2.imwrite("output/object-detection.jpg", image) cv2.destroyAllWindows() ``` 这段代码展示了如何利用YOLO模型来进行基本的对象检测,并将结果可视化显示出来。需要注意的是,在实际部署时可能还需要考虑更多细节,比如批量处理多张图片、优化性能等[^3]。
评论 13
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值