解决supervision.BoxAnnotator中文乱码问题

本文链接：https://blog.csdn.net/CSDNJERRYYAO/article/details/131355414

文章讨论了在使用supervision包的BoxAnnotator进行图像标注时遇到的中文乱码问题。问题根源在于cv2库不支持中文字符。解决方案是将cv2替换为PIL，利用PIL的ImageFont加载支持中文的字体文件，从而正确显示中文标签。作者已对annotate.py文件进行了相应修改。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

前言

github上开源的yolov8采用了supervision包作为结果可视化的工具。这个包用起来非常方便，只需要几行代码就可以解决头疼的在源图像上画框以及打印标签的问题。而且supervision也是官方支持了yolov8，使用supervision.Detections.from_yolov8这个函数就可以直接将预测的结果转换成Detections对象。

而在yolov5中，使用的是PIL和cv2封装的函数将预测结果处理到图像上。

可以肯定的是使用supervision包去处理更加便捷，而且更加灵活，只需要一两行代码就可以完成置信度的筛选、指定区域的检测框的筛选，指定类别的框的筛选等等。

问题描述

使用BoxAnnotator.annotate在原图像中绘制框和标签的时候，如果标签是非ascii字符，导致的乱码问题。

问题根源

下面是目前BoxAnnotator.annotate的源码

 def annotate(
    self,
    scene: np.ndarray,
    detections: Detections,
    labels: Optional[List[str]] = None,
    skip_label: bool = False,
) -> np.ndarray:
    """
    Draws bounding boxes on the frame using the detections provided.

    Args:
        scene (np.ndarray): The image on which the bounding boxes will be drawn
        detections (Detections): The detections for which the bounding boxes will be drawn
        labels (Optional[List[str]]): An optional list of labels corresponding to each detection. If `labels` are not provided, corresponding `class_id` will be used as label.
        skip_label (bool): Is set to `True`, skips bounding box label annotation.
    Returns:
        np.ndarray: The image with the bounding boxes drawn on it

    Example:
        ```python
        >>> import supervision as sv

        >>> classes = ['person', ...]
        >>> image = ...
        >>> detections = sv.Detections(...)

        >>> box_annotator = sv.BoxAnnotator()
        >>> labels = [
        ...     f"{classes[class_id]} {confidence:0.2f}"
        ...     for _, _, confidence, class_id, _
        ...     in detections
        ... ]
        >>> annotated_frame = box_annotator.annotate(
        ...     scene=image.copy(),
        ...     detections=detections,
        ...     labels=labels
        ... )
        ```
    """
    font = cv2.FONT_HERSHEY_SIMPLEX
    for i in range(len(detections)):
        x1, y1, x2, y2 = detections.xyxy[i].astype(int)
        class_id = (
            detections.class_id[i] if detections.class_id is not None else None
        )
        idx = class_id if class_id is not None else i
        color = (
            self.color.by_idx(idx)
            if isinstance(self.color, ColorPalette)
            else self.color
        )
        cv2.rectangle(
            img=scene,
            pt1=(x1, y1),
            pt2=(x2, y2),
            color=color.as_bgr(),
            thickness=self.thickness,
        )
        if skip_label:
            continue

        text = (
            f"{class_id}"
            if (labels is None or len(detections) != len(labels))
            else labels[i]
        )

        text_width, text_height = cv2.getTextSize(
            text=text,
            fontFace=font,
            fontScale=self.text_scale,
            thickness=self.text_thickness,
        )[0]

        text_x = x1 + self.text_padding
        text_y = y1 - self.text_padding

        text_background_x1 = x1
        text_background_y1 = y1 - 2 * self.text_padding - text_height

        text_background_x2 = x1 + 2 * self.text_padding + text_width
        text_background_y2 = y1

        cv2.rectangle(
            img=scene,
            pt1=(text_background_x1, text_background_y1),
            pt2=(text_background_x2, text_background_y2),
            color=color.as_bgr(),
            thickness=cv2.FILLED,
        )
        cv2.putText(
            img=scene,
            text=text,
            org=(text_x, text_y),
            fontFace=font,
            fontScale=self.text_scale,
            color=self.text_color.as_rgb(),
            thickness=self.text_thickness,
            lineType=cv2.LINE_AA,
        )
    return scene

源码中使用的是cv2.putText绘制的标签，目前cv2还没有办法很好地支持中文，所以解决方法一般都是将cv2替换成PIL做图像的处理，因为PIL包在绘制文字之前会让你指定字体文件，只需要加载一个支持中文的字体文件即可。

解决方案

我对这一个方法做了很大的修改，将cv2换成了PIL
下面是我将/usr/local/lib/pyhton3.8/dist-packages/supervision/detection/annotate.py修改后的文件

from typing import List, Optional, Union

import cv2
import numpy as np

from supervision.detection.core import Detections
from supervision.draw.color import Color, ColorPalette
from PIL import Image, ImageFont, ImageDraw

class BoxAnnotator:
    """
    A class for drawing bounding boxes on an image using detections provided.
    """
    def __init__(
        self,
        font_path: str,
        line_width: int = 1,
        fontsize: int = 10
    ):
        self.font_path = font_path
        self.line_width = line_width
        self.fontsize = fontsize

    def annotate(
        self,
        scene: np.ndarray,
        detections: Detections,
        idx2color: dict() = None,
        idx2label: dict() = None,
        skip_label: bool = False
    ) -> np.ndarray:

        scene = Image.fromarray(scene)
        draw = ImageDraw.Draw(scene)
        fontText = ImageFont.truetype(self.font_path, self.fontsize, encoding="utf-8")

        for i in range(len(detections)):
            x1, y1, x2, y2 = detections.xyxy[i].astype(int)
            class_id = (
                detections.class_id[i] if detections.class_id is not None else None
            )

            color = (255, 0, 0) if idx2color is None else idx2color[class_id]
        
            # draw rectangle 
            draw.rectangle([(x1, y1), (x2, y2)], width = self.line_width, fill = None, outline = color)
            
            # draw label
            if skip_label:
                continue

            text = str(class_id) if idx2label is None else idx2label[class_id]
        
            w, h = fontText.getsize(text)
            outside = y1 - h >= 0
            draw.rectangle(
                (x1, y1 - h if outside else y1, x1 + w + 1,
                    y1 + 1 if outside else y1 + h + 1),
                fill=color,
            )
            draw.text((x1, y1 - h if outside else y1), text, fill = 'white', font = fontText)
        
        return np.array(scene)


class MaskAnnotator:
    """
    A class for overlaying masks on an image using detections provided.

    Attributes:
        color (Union[Color, ColorPalette]): The color to fill the mask, can be a single color or a color palette
    """

    def __init__(
        self,
        color: Union[Color, ColorPalette] = ColorPalette.default(),
    ):
        self.color: Union[Color, ColorPalette] = color

    def annotate(
        self, scene: np.ndarray, detections: Detections, opacity: float = 0.5
    ) -> np.ndarray:
        """
        Overlays the masks on the given image based on the provided detections, with a specified opacity.


        Returns:
            np.ndarray: The image with the masks overlaid
        """
        if detections.mask is None:
            return scene

        for i in np.flip(np.argsort(detections.area)):
            class_id = (
                detections.class_id[i] if detections.class_id is not None else None
            )
            idx = class_id if class_id is not None else i
            color = (
                self.color.by_idx(idx)
                if isinstance(self.color, ColorPalette)
                else self.color
            )

            mask = detections.mask[i]
            colored_mask = np.zeros_like(scene, dtype=np.uint8)
            colored_mask[:] = color.as_bgr()

            scene = np.where(
                np.expand_dims(mask, axis=-1),
                np.uint8(opacity * colored_mask + (1 - opacity) * scene),
                scene,
            )

        return scene