前言
github上开源的yolov8采用了supervision包作为结果可视化的工具。这个包用起来非常方便,只需要几行代码就可以解决头疼的在源图像上画框以及打印标签的问题。而且supervision也是官方支持了yolov8,使用supervision.Detections.from_yolov8
这个函数就可以直接将预测的结果转换成Detections
对象。
而在yolov5中,使用的是PIL和cv2封装的函数将预测结果处理到图像上。
可以肯定的是使用supervision包去处理更加便捷,而且更加灵活,只需要一两行代码就可以完成置信度的筛选、指定区域的检测框的筛选,指定类别的框的筛选等等。
问题描述
使用BoxAnnotator.annotate
在原图像中绘制框和标签的时候,如果标签是非ascii字符,导致的乱码问题。
问题根源
下面是目前BoxAnnotator.annotate的源码
def annotate(
self,
scene: np.ndarray,
detections: Detections,
labels: Optional[List[str]] = None,
skip_label: bool = False,
) -> np.ndarray:
"""
Draws bounding boxes on the frame using the detections provided.
Args:
scene (np.ndarray): The image on which the bounding boxes will be drawn
detections (Detections): The detections for which the bounding boxes will be drawn
labels (Optional[List[str]]): An optional list of labels corresponding to each detection. If `labels` are not provided, corresponding `class_id` will be used as label.
skip_label (bool): Is set to `True`, skips bounding box label annotation.
Returns:
np.ndarray: The image with the bounding boxes drawn on it
Example:
```python
>>> import supervision as sv
>>> classes = ['person', ...]
>>> image = ...
>>> detections = sv.Detections(...)
>>> box_annotator = sv.BoxAnnotator()
>>> labels = [
... f"{classes[class_id]} {confidence:0.2f}"
... for _, _, confidence, class_id, _
... in detections
... ]
>>> annotated_frame = box_annotator.annotate(
... scene=image.copy(),
... detections=detections,
... labels=labels
... )
```
"""
font = cv2.FONT_HERSHEY_SIMPLEX
for i in range(len(detections)):
x1, y1, x2, y2 = detections.xyxy[i].astype(int)
class_id = (
detections.class_id[i] if detections.class_id is not None else None
)
idx = class_id if class_id is not None else i
color = (
self.color.by_idx(idx)
if isinstance(self.color, ColorPalette)
else self.color
)
cv2.rectangle(
img=scene,
pt1=(x1, y1),
pt2=(x2, y2),
color=color.as_bgr(),
thickness=self.thickness,
)
if skip_label:
continue
text = (
f"{class_id}"
if (labels is None or len(detections) != len(labels))
else labels[i]
)
text_width, text_height = cv2.getTextSize(
text=text,
fontFace=font,
fontScale=self.text_scale,
thickness=self.text_thickness,
)[0]
text_x = x1 + self.text_padding
text_y = y1 - self.text_padding
text_background_x1 = x1
text_background_y1 = y1 - 2 * self.text_padding - text_height
text_background_x2 = x1 + 2 * self.text_padding + text_width
text_background_y2 = y1
cv2.rectangle(
img=scene,
pt1=(text_background_x1, text_background_y1),
pt2=(text_background_x2, text_background_y2),
color=color.as_bgr(),
thickness=cv2.FILLED,
)
cv2.putText(
img=scene,
text=text,
org=(text_x, text_y),
fontFace=font,
fontScale=self.text_scale,
color=self.text_color.as_rgb(),
thickness=self.text_thickness,
lineType=cv2.LINE_AA,
)
return scene
源码中使用的是cv2.putText
绘制的标签,目前cv2还没有办法很好地支持中文,所以解决方法一般都是将cv2替换成PIL做图像的处理,因为PIL包在绘制文字之前会让你指定字体文件,只需要加载一个支持中文的字体文件即可。
解决方案
我对这一个方法做了很大的修改,将cv2换成了PIL
下面是我将/usr/local/lib/pyhton3.8/dist-packages/supervision/detection/annotate.py
修改后的文件
from typing import List, Optional, Union
import cv2
import numpy as np
from supervision.detection.core import Detections
from supervision.draw.color import Color, ColorPalette
from PIL import Image, ImageFont, ImageDraw
class BoxAnnotator:
"""
A class for drawing bounding boxes on an image using detections provided.
"""
def __init__(
self,
font_path: str,
line_width: int = 1,
fontsize: int = 10
):
self.font_path = font_path
self.line_width = line_width
self.fontsize = fontsize
def annotate(
self,
scene: np.ndarray,
detections: Detections,
idx2color: dict() = None,
idx2label: dict() = None,
skip_label: bool = False
) -> np.ndarray:
scene = Image.fromarray(scene)
draw = ImageDraw.Draw(scene)
fontText = ImageFont.truetype(self.font_path, self.fontsize, encoding="utf-8")
for i in range(len(detections)):
x1, y1, x2, y2 = detections.xyxy[i].astype(int)
class_id = (
detections.class_id[i] if detections.class_id is not None else None
)
color = (255, 0, 0) if idx2color is None else idx2color[class_id]
# draw rectangle
draw.rectangle([(x1, y1), (x2, y2)], width = self.line_width, fill = None, outline = color)
# draw label
if skip_label:
continue
text = str(class_id) if idx2label is None else idx2label[class_id]
w, h = fontText.getsize(text)
outside = y1 - h >= 0
draw.rectangle(
(x1, y1 - h if outside else y1, x1 + w + 1,
y1 + 1 if outside else y1 + h + 1),
fill=color,
)
draw.text((x1, y1 - h if outside else y1), text, fill = 'white', font = fontText)
return np.array(scene)
class MaskAnnotator:
"""
A class for overlaying masks on an image using detections provided.
Attributes:
color (Union[Color, ColorPalette]): The color to fill the mask, can be a single color or a color palette
"""
def __init__(
self,
color: Union[Color, ColorPalette] = ColorPalette.default(),
):
self.color: Union[Color, ColorPalette] = color
def annotate(
self, scene: np.ndarray, detections: Detections, opacity: float = 0.5
) -> np.ndarray:
"""
Overlays the masks on the given image based on the provided detections, with a specified opacity.
Returns:
np.ndarray: The image with the masks overlaid
"""
if detections.mask is None:
return scene
for i in np.flip(np.argsort(detections.area)):
class_id = (
detections.class_id[i] if detections.class_id is not None else None
)
idx = class_id if class_id is not None else i
color = (
self.color.by_idx(idx)
if isinstance(self.color, ColorPalette)
else self.color
)
mask = detections.mask[i]
colored_mask = np.zeros_like(scene, dtype=np.uint8)
colored_mask[:] = color.as_bgr()
scene = np.where(
np.expand_dims(mask, axis=-1),
np.uint8(opacity * colored_mask + (1 - opacity) * scene),
scene,
)
return scene
因为项目需求,我将原函数中的一些参数去掉了并换成了方便我使用的参数,主要是解决了中文字体无法显示的问题。
修改annotate.py文件后,再按照修改后的函数传入参数即可正常显示中文了。