# YOLOv8-Seg推理详解及部署实现

33 篇文章 143 订阅

### 一、2024/1/10更新

def iou(box1, box2):
def area_box(box):
return (box[2] - box[0]) * (box[3] - box[1])

left, top = max(box1[:2], box2[:2])
right, bottom = min(box1[2:4], box2[2:4])
...


def iou(box1, box2):
def area_box(box):
return (box[2] - box[0]) * (box[3] - box[1])
# box -> [x1,y1,x2,y2,...]
left   = max(box1[0], box2[0])
top    = max(box1[1], box2[1])
right  = min(box1[2], box2[2])
bottom = min(box1[3], box2[3])
...


### 一、YOLOv8-Seg推理(Python)

#### 1. YOLOv8-Seg预测

import cv2
import numpy as np
from ultralytics import YOLO

def hsv2bgr(h, s, v):
h_i = int(h * 6)
f = h * 6 - h_i
p = v * (1 - s)
q = v * (1 - f * s)
t = v * (1 - (1 - f) * s)

r, g, b = 0, 0, 0

if h_i == 0:
r, g, b = v, t, p
elif h_i == 1:
r, g, b = q, v, p
elif h_i == 2:
r, g, b = p, v, t
elif h_i == 3:
r, g, b = p, q, v
elif h_i == 4:
r, g, b = t, p, v
elif h_i == 5:
r, g, b = v, p, q

return int(b * 255), int(g * 255), int(r * 255)

def random_color(id):
h_plane = (((id << 2) ^ 0x937151) % 100) / 100.0
s_plane = (((id << 3) ^ 0x315793) % 100) / 100.0
return hsv2bgr(h_plane, s_plane, 1)

if __name__ == "__main__":

model = YOLO("yolov8s-seg.pt")

result = model(img)[0]
names = result.names
boxes = result.boxes.data.tolist()

h, w = img.shape[:2]

label = int(boxes[i][5])
color = np.array(random_color(label))

colored_mask = (np.ones((h, w, 3)) * color).astype(np.uint8)

# for i, points in enumerate(masks.xy):
#     label = int(boxes[i][5])
#     color = random_color(label)
#     points = np.array(points, np.int32)
#     cv2.drawContours(img, [points], -1, color, 2)

for obj in boxes:
left, top, right, bottom = int(obj[0]), int(obj[1]), int(obj[2]), int(obj[3])
confidence = obj[4]
label = int(obj[5])
color = random_color(label)
cv2.rectangle(img, (left, top), (right, bottom), color = color ,thickness=2, lineType=cv2.LINE_AA)
caption = f"{names[label]} {confidence:.2f}"
w, h = cv2.getTextSize(caption, 0, 1, 2)[0]
cv2.rectangle(img, (left - 3, top - 33), (left + w + 10, top), color, -1)
cv2.putText(img, caption, (left, top - 5), 0, 1, (0, 0, 0), 2, 16)

cv2.imwrite("predict-seg.jpg", img)
print("save done")


#### 2. YOLOv8-Seg预处理

def preprocess(self, im):
"""
Prepares input image before inference.

Args:
im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
"""
not_tensor = not isinstance(im, torch.Tensor)
if not_tensor:
im = np.stack(self.pre_transform(im))
im = im[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
im = np.ascontiguousarray(im)  # contiguous
im = torch.from_numpy(im)

im = im.to(self.device)
im = im.half() if self.model.fp16 else im.float()  # uint8 to fp16/32
if not_tensor:
im /= 255  # 0 - 255 to 0.0 - 1.0
return im


• self.pre_transform：即 letterbox 添加灰条
• im[…,::-1]：BGR → RGB
• transpose((0, 3, 1, 2))：添加 batch 维度，HWC → CHW
• torch.from_numpy：to Tensor
• im /= 255：除以 255，归一化

def preprocess_warpAffine(image, dst_width=640, dst_height=640):
scale = min((dst_width / image.shape[1], dst_height / image.shape[0]))
ox = (dst_width  - scale * image.shape[1]) / 2
oy = (dst_height - scale * image.shape[0]) / 2
M = np.array([
[scale, 0, ox],
[0, scale, oy]
], dtype=np.float32)

img_pre = cv2.warpAffine(image, M, (dst_width, dst_height), flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT, borderValue=(114, 114, 114))
IM = cv2.invertAffineTransform(M)

img_pre = (img_pre[...,::-1] / 255.0).astype(np.float32)
img_pre = img_pre.transpose(2, 0, 1)[None]
img_pre = torch.from_numpy(img_pre)
return img_pre, IM


warpAffine 预处理方法将图像分辨率固定在 640x640，主要有以下几点考虑：(from chatGPT)

• 简化处理逻辑：所有预处理后的图像分辨率相同，可以简化 CUDA 中并行处理的逻辑，使得代码更易于编写和维护。
• 优化内存访问：在 GPU 上，连续的内存访问模式通常比非连续的访问更高效。如果所有图像具有相同的大小和布局，这可以帮助优化内存访问，提高处理速度。
• 避免动态内存分配：动态内存分配和释放是昂贵的操作，特别是在 GPU 上。固定分辨率意味着可以预先分配足够的内存，而不需要根据每个图像的大小动态调整内存大小。

#### 3. YOLOv8-Seg后处理

class SegmentationPredictor(DetectionPredictor):
"""
A class extending the DetectionPredictor class for prediction based on a segmentation model.

Example:
python
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.segment import SegmentationPredictor

args = dict(model='yolov8n-seg.pt', source=ASSETS)
predictor = SegmentationPredictor(overrides=args)
predictor.predict_cli()

"""

def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
"""Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks."""
super().__init__(cfg, overrides, _callbacks)

def postprocess(self, preds, img, orig_imgs):
"""Applies non-max suppression and processes detections for each image in an input batch."""
p = ops.non_max_suppression(preds[0],
self.args.conf,
self.args.iou,
agnostic=self.args.agnostic_nms,
max_det=self.args.max_det,
nc=len(self.model.names),
classes=self.args.classes)

if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)

results = []
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1]  # second output is len 3 if pt, but only 1 if exported
for i, pred in enumerate(p):
orig_img = orig_imgs[i]
img_path = self.batch[0][i]
if not len(pred):  # save empty boxes
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
else:
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
return results


• ops.non_max_suppression：非极大值抑制，即 NMS
• ops.scale_boxes：框的解码，即 decode boxes

def iou(box1, box2):
def area_box(box):
return (box[2] - box[0]) * (box[3] - box[1])

left   = max(box1[0], box2[0])
top    = max(box1[1], box2[1])
right  = min(box1[2], box2[2])
bottom = min(box1[3], box2[3])
cross  = max((right-left), 0) * max((bottom-top), 0)
union  = area_box(box1) + area_box(box2) - cross
if cross == 0 or union == 0:
return 0
return cross / union

def NMS(boxes, iou_thres):

remove_flags = [False] * len(boxes)

keep_boxes = []
for i, ibox in enumerate(boxes):
if remove_flags[i]:
continue

keep_boxes.append(ibox)
for j in range(i + 1, len(boxes)):
if remove_flags[j]:
continue

jbox = boxes[j]
if(ibox[5] != jbox[5]):
continue
if iou(ibox, jbox) > iou_thres:
remove_flags[j] = True
return keep_boxes

def postprocess(pred, conf_thres=0.25, iou_thres=0.45):

# 输入是模型推理的结果，即8400个预测框
# 1,8400,116 [cx,cy,w,h,class*80,32]
boxes = []
for item in pred[0]:
cx, cy, w, h = item[:4]
label = item[4:-32].argmax()
confidence = item[4 + label]
if confidence < conf_thres:
continue
left    = cx - w * 0.5
top     = cy - h * 0.5
right   = cx + w * 0.5
bottom  = cy + h * 0.5
boxes.append([left, top, right, bottom, confidence, label, *item[-32:]])

boxes = sorted(boxes, key=lambda x:x[4], reverse=True)

return NMS(boxes, iou_thres)

# boxes -> n, 4         检测框，映射到 160x160 尺寸下的
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(n,1,1)
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,1,w)
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(1,h,1)

return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))

# protos   -> 32, 160, 160 分割头输出
# bboxes   -> n, 4         检测框
# shape    -> 640, 640     输入网络中的图像 shape
# unsample 一个 bool 值，表示是否需要上采样 masks 到图像的原始形状
c, mh, mw = protos.shape  # CHW
ih, iw = shape
# 矩阵相乘 nx32 @ 32x(160x160) -> nx(160x160) -> sigmoid -> nx160x160

downsampled_bboxes = bboxes.clone()
downsampled_bboxes[:, 0] *= mw / iw
downsampled_bboxes[:, 2] *= mw / iw
downsampled_bboxes[:, 3] *= mh / ih
downsampled_bboxes[:, 1] *= mh / ih

if upsample:


8400 × 116 = 80 × 80 × 116 + 40 × 40 × 116 + 20 × 20 × 116 = 80 × 80 × ( 84 + 32 ) + 40 × 40 × ( 84 + 32 ) + 20 × 20 × ( 84 + 32 ) = 80 × 80 × ( 4 + 80 + 32 ) + 40 × 40 × ( 4 + 80 + 32 ) + 20 × 20 × ( 4 + 80 + 32 ) \begin{aligned} 8400\times116&=80\times80\times116+40\times40\times116+20\times20\times116\\ &=80\times80\times(84+32)+40\times40\times(84+32)+20\times20\times(84+32)\\ &=80\times80\times(4+80+32)+40\times40\times(4+80+32)+20\times20\times(4+80+32)\\ \end{aligned}

• protos：分割头的输出，形状为 32x160x160
• bboxes：检测框，形状为 nx4
• shape：输入网络中的图像大小 640x640

• 然后使用 sigmoid 函数将原始 masks 的值映射到 [0,1] 之间的概率值
• 接着会根据图像的 shape 将边界框的大小缩放到 160x160 上
• 如果 unsample 为 True，则使用双线性插值将 masks 上采样到图像的原始形状（640x640）
• 最后使用 gt_(0.5) 将 masks 的值映射到 {0, 1}。大于 0.5 的部分我们认为这个像素有超过 50% 的概率属于前景（目标物体），因此设置为 1；反之，如果概率小于等于 0.5，我们则认为该像素是背景，设置为 0

#### 4. YOLOv8-Seg推理

import cv2
import torch
import numpy as np
import torch.nn.functional as F
from ultralytics.data.augment import LetterBox
from ultralytics.nn.autobackend import AutoBackend

def preprocess_letterbox(image):
letterbox = LetterBox(new_shape=640, stride=32, auto=True)
image = letterbox(image=image)
image = (image[..., ::-1] / 255.0).astype(np.float32) # BGR to RGB, 0 - 255 to 0.0 - 1.0
image = image.transpose(2, 0, 1)[None]  # BHWC to BCHW (n, 3, h, w)
image = torch.from_numpy(image)
return image

def preprocess_warpAffine(image, dst_width=640, dst_height=640):
scale = min((dst_width / image.shape[1], dst_height / image.shape[0]))
ox = (dst_width  - scale * image.shape[1]) / 2
oy = (dst_height - scale * image.shape[0]) / 2
M = np.array([
[scale, 0, ox],
[0, scale, oy]
], dtype=np.float32)

img_pre = cv2.warpAffine(image, M, (dst_width, dst_height), flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT, borderValue=(114, 114, 114))
IM = cv2.invertAffineTransform(M)

img_pre = (img_pre[...,::-1] / 255.0).astype(np.float32)
img_pre = img_pre.transpose(2, 0, 1)[None]
img_pre = torch.from_numpy(img_pre)
return img_pre, IM

def iou(box1, box2):
def area_box(box):
return (box[2] - box[0]) * (box[3] - box[1])

left   = max(box1[0], box2[0])
top    = max(box1[1], box2[1])
right  = min(box1[2], box2[2])
bottom = min(box1[3], box2[3])
cross  = max((right-left), 0) * max((bottom-top), 0)
union  = area_box(box1) + area_box(box2) - cross
if cross == 0 or union == 0:
return 0
return cross / union

def NMS(boxes, iou_thres):

remove_flags = [False] * len(boxes)

keep_boxes = []
for i, ibox in enumerate(boxes):
if remove_flags[i]:
continue

keep_boxes.append(ibox)
for j in range(i + 1, len(boxes)):
if remove_flags[j]:
continue

jbox = boxes[j]
if(ibox[5] != jbox[5]):
continue
if iou(ibox, jbox) > iou_thres:
remove_flags[j] = True
return keep_boxes

def postprocess(pred, conf_thres=0.25, iou_thres=0.45):

# 输入是模型推理的结果，即8400个预测框
# 1,8400,116 [cx,cy,w,h,class*80,32]
boxes = []
for item in pred[0]:
cx, cy, w, h = item[:4]
label = item[4:-32].argmax()
confidence = item[4 + label]
if confidence < conf_thres:
continue
left    = cx - w * 0.5
top     = cy - h * 0.5
right   = cx + w * 0.5
bottom  = cy + h * 0.5
boxes.append([left, top, right, bottom, confidence, label, *item[-32:]])

boxes = sorted(boxes, key=lambda x:x[4], reverse=True)

return NMS(boxes, iou_thres)

# boxes -> n, 4         检测框，映射到 160x160 尺寸下的
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(n,1,1)
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,1,w)
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(1,h,1)

return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))

# protos   -> 32, 160, 160 分割头输出
# bboxes   -> n, 4         检测框
# shape    -> 640, 640     输入网络中的图像 shape
# unsample 一个 bool 值，表示是否需要上采样 masks 到图像的原始形状
c, mh, mw = protos.shape  # CHW
ih, iw = shape
# 矩阵相乘 nx32 @ 32x(160x160) -> nx(160x160) -> sigmoid -> nx160x160

downsampled_bboxes = bboxes.clone()
downsampled_bboxes[:, 0] *= mw / iw
downsampled_bboxes[:, 2] *= mw / iw
downsampled_bboxes[:, 3] *= mh / ih
downsampled_bboxes[:, 1] *= mh / ih

if upsample:

def hsv2bgr(h, s, v):
h_i = int(h * 6)
f = h * 6 - h_i
p = v * (1 - s)
q = v * (1 - f * s)
t = v * (1 - (1 - f) * s)

r, g, b = 0, 0, 0

if h_i == 0:
r, g, b = v, t, p
elif h_i == 1:
r, g, b = q, v, p
elif h_i == 2:
r, g, b = p, v, t
elif h_i == 3:
r, g, b = p, q, v
elif h_i == 4:
r, g, b = t, p, v
elif h_i == 5:
r, g, b = v, p, q

return int(b * 255), int(g * 255), int(r * 255)

def random_color(id):
h_plane = (((id << 2) ^ 0x937151) % 100) / 100.0
s_plane = (((id << 3) ^ 0x315793) % 100) / 100.0
return hsv2bgr(h_plane, s_plane, 1)

if __name__ == "__main__":

# img_pre = preprocess_letterbox(img)
img_pre, IM = preprocess_warpAffine(img)

model  = AutoBackend(weights="yolov8s-seg.pt")
names  = model.names
result = model(img_pre)
"""
result[0] -> 1, 116, 8400 -> det head
result[1][0][0] -> 1, 144, 80, 80
result[1][0][1] -> 1, 144, 40, 40
result[1][0][2] -> 1, 144, 20, 20
result[1][1] -> 1, 32, 8400
result[1][2] -> 1, 32, 160, 160 -> seg head
"""

output0 = result[0].transpose(-1, -2) # 1,8400,116 检测头输出
output1 = result[1][2][0]             # 32,160,160 分割头输出

pred = postprocess(output0)
pred = torch.from_numpy(np.array(pred).reshape(-1, 38))

# pred -> nx38 = [cx,cy,w,h,conf,label,32]

boxes = np.array(pred[:,:6])
lr = boxes[:, [0, 2]]
tb = boxes[:,[1, 3]]
boxes[:,[0, 2]] = IM[0][0] * lr + IM[0][2]
boxes[:,[1, 3]] = IM[1][1] * tb + IM[1][2]

h, w = img.shape[:2]

label = int(boxes[i][5])
color = np.array(random_color(label))

colored_mask = (np.ones((h, w, 3)) * color).astype(np.uint8)

# contours, _ = cv2.findContours(mask_resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# cv2.drawContours(img, contours, -1, random_color(label), 2)

# draw box
for obj in boxes:
left, top, right, bottom = int(obj[0]), int(obj[1]), int(obj[2]), int(obj[3])
confidence = obj[4]
label = int(obj[5])
color = random_color(label)
cv2.rectangle(img, (left, top), (right, bottom), color = color ,thickness=2, lineType=cv2.LINE_AA)
caption = f"{names[label]} {confidence:.2f}"
w, h = cv2.getTextSize(caption, 0, 1, 2)[0]
cv2.rectangle(img, (left - 3, top - 33), (left + w + 10, top), color, -1)
cv2.putText(img, caption, (left, top - 5), 0, 1, (0, 0, 0), 2, 16)

cv2.imwrite("infer-seg.jpg", img)
print("save done")


### 二、YOLOv8-Seg推理(C++)

C++ 上的实现我们使用的 repo 依旧是 tensorRT_Pro，现在我们就基于 tensorRT_Pro 完成 YOLOv8-Seg 在 C++ 上的推理。

#### 1. ONNX导出

• 输入输出只让 batch 维度动态，宽高不动态
• 增加 transpose 节点交换输出的 2、3 维度

1. 在 ultralytics/engine/exporter.py 文件中改动一处

• 326 行：输入只让 batch 维度动态，宽高不动态
• 328/329 行：输出只让 batch 动态，宽高不动态
# ========== exporter.py ==========

# ultralytics/engine/exporter.py第323行
# output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0']
# dynamic = self.args.dynamic
# if dynamic:
#     dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}}  # shape(1,3,640,640)
#     if isinstance(self.model, SegmentationModel):
#         dynamic['output0'] = {0: 'batch', 2: 'anchors'}  # shape(1, 116, 8400)
#     elif isinstance(self.model, DetectionModel):
#         dynamic['output0'] = {0: 'batch', 2: 'anchors'}  # shape(1, 84, 8400)
# 修改为：

output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0']
dynamic = self.args.dynamic
if dynamic:
dynamic = {'images': {0: 'batch'}}  # shape(1,3,640,640)
if isinstance(self.model, SegmentationModel):
dynamic['output0'] = {0: 'batch'}  # shape(1, 116, 8400)
dynamic['output1'] = {0: 'batch'}  # shape(1,32,160,160)
elif isinstance(self.model, DetectionModel):
dynamic['output0'] = {0: 'batch', 2: 'anchors'}  # shape(1, 84, 8400)


• 106 行：添加 transpose 节点交换检测头输出的第 2 和 第 3 维度
# ========== head.py ==========

# return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
# 修改为：

return (torch.cat([x, mc], 1).permute(0, 2, 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))


from ultralytics import YOLO

model = YOLO("yolov8s-seg.pt")

success = model.export(format="onnx", dynamic=True, simplify=True)


python export.py


#### 2. YOLOv8-Seg预处理

tensorRT_Pro 中预处理的代码如下：

__global__ void warp_affine_bilinear_and_normalize_plane_kernel(uint8_t* src, int src_line_size, int src_width, int src_height, float* dst, int dst_width, int dst_height,
uint8_t const_value_st, float* warp_affine_matrix_2_3, Norm norm, int edge){

int position = blockDim.x * blockIdx.x + threadIdx.x;
if (position >= edge) return;

float m_x1 = warp_affine_matrix_2_3[0];
float m_y1 = warp_affine_matrix_2_3[1];
float m_z1 = warp_affine_matrix_2_3[2];
float m_x2 = warp_affine_matrix_2_3[3];
float m_y2 = warp_affine_matrix_2_3[4];
float m_z2 = warp_affine_matrix_2_3[5];

int dx      = position % dst_width;
int dy      = position / dst_width;
float src_x = m_x1 * dx + m_y1 * dy + m_z1;
float src_y = m_x2 * dx + m_y2 * dy + m_z2;
float c0, c1, c2;

if(src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height){
// out of range
c0 = const_value_st;
c1 = const_value_st;
c2 = const_value_st;
}else{
int y_low = floorf(src_y);
int x_low = floorf(src_x);
int y_high = y_low + 1;
int x_high = x_low + 1;

uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
float ly    = src_y - y_low;
float lx    = src_x - x_low;
float hy    = 1 - ly;
float hx    = 1 - lx;
float w1    = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
uint8_t* v1 = const_value;
uint8_t* v2 = const_value;
uint8_t* v3 = const_value;
uint8_t* v4 = const_value;
if(y_low >= 0){
if (x_low >= 0)
v1 = src + y_low * src_line_size + x_low * 3;

if (x_high < src_width)
v2 = src + y_low * src_line_size + x_high * 3;
}

if(y_high < src_height){
if (x_low >= 0)
v3 = src + y_high * src_line_size + x_low * 3;

if (x_high < src_width)
v4 = src + y_high * src_line_size + x_high * 3;
}

// same to opencv
c0 = floorf(w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0] + 0.5f);
c1 = floorf(w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1] + 0.5f);
c2 = floorf(w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2] + 0.5f);
}

if(norm.channel_type == ChannelType::Invert){
float t = c2;
c2 = c0;  c0 = t;
}

if(norm.type == NormType::MeanStd){
c0 = (c0 * norm.alpha - norm.mean[0]) / norm.std[0];
c1 = (c1 * norm.alpha - norm.mean[1]) / norm.std[1];
c2 = (c2 * norm.alpha - norm.mean[2]) / norm.std[2];
}else if(norm.type == NormType::AlphaBeta){
c0 = c0 * norm.alpha + norm.beta;
c1 = c1 * norm.alpha + norm.beta;
c2 = c2 * norm.alpha + norm.beta;
}

int area = dst_width * dst_height;
float* pdst_c0 = dst + dy * dst_width + dx;
float* pdst_c1 = pdst_c0 + area;
float* pdst_c2 = pdst_c1 + area;
*pdst_c0 = c0;
*pdst_c1 = c1;
*pdst_c2 = c2;
}


#### 3. YOLOv8-Seg后处理

infer 框架中有关于 YOLOv8-Seg 模型的后处理，因此我们直接 copy 过来即可，它包括检测框的后处理和 mask 的后处理，我们先来看检测框的后处理，代码可参考：yolo.cu#L129

static __global__ void decode_kernel_v8_Seg(float *predict, int num_bboxes, int num_classes, float confidence_threshold, float* invert_affine_matrix, float* parray, int MAX_IMAGE_BOXES){

int position = blockDim.x * blockIdx.x + threadIdx.x;
if (position >= num_bboxes) return;

float* pitem            = predict + (4 + num_classes + 32) * position;
float* class_confidence = pitem + 4;
float confidence        = *class_confidence++;
int label               = 0;
for(int i = 1; i < num_classes; ++i, ++class_confidence){
if(*class_confidence > confidence){
confidence = *class_confidence;
label      = i;
}
}

if(confidence < confidence_threshold)
return;

if(index >= MAX_IMAGE_BOXES)
return;

float cx         = *pitem++;
float cy         = *pitem++;
float width      = *pitem++;
float height     = *pitem++;
float left   = cx - width  * 0.5f;
float top    = cy - height * 0.5f;
float right  = cx + width  * 0.5f;
float bottom = cy + height * 0.5f;
affine_project(invert_affine_matrix, left,  top,    &left,  &top);
affine_project(invert_affine_matrix, right, bottom, &right, &bottom);

float *pout_item = parray + 1 + index * NUM_BOX_ELEMENT;
*pout_item++ = left;
*pout_item++ = top;
*pout_item++ = right;
*pout_item++ = bottom;
*pout_item++ = confidence;
*pout_item++ = label;
*pout_item++ = 1;  // 1 = keep, 0 = ignore
*pout_item++ = position;  // row_index
}


Box result_object_box(pbox[0], pbox[1], pbox[2], pbox[3], pbox[4], pbox[5]);
// reference: https://github.com/shouxieai/infer/blob/main/src/yolo.cu#L629
int row_index = pbox[7];

float left, top, right, bottom;
affine_project(i2d, pbox[0], pbox[1], &left,  &top);
affine_project(i2d, pbox[2], pbox[3], &right, &bottom);

float box_width          = right - left;
float box_height         = bottom - top;
int mask_out_width       = box_width  * scale_to_predict_x + 0.5f;
int mask_out_height      = box_height * scale_to_predict_y + 0.5f;

result_object_box.seg->left = left * scale_to_predict_x;
result_object_box.seg->top  = top  * scale_to_predict_y;
image_based_boxes.emplace_back(result_object_box);
}


static __global__ void decode_single_mask_kernel(int left, int top, float *mask_weights, float *mask_predict, int mask_width, int mask_height, unsigned char *mask_out, int mask_dim, int out_width, int out_height) {

int dx = blockDim.x * blockIdx.x + threadIdx.x;
int dy = blockDim.y * blockIdx.y + threadIdx.y;
if (dx >= out_width || dy >= out_height) return;

int sx = left + dx;
int sy = top + dy;
if (sx < 0 || sx >= mask_width || sy < 0 || sy >= mask_height) {
mask_out[dy * out_width + dx] = 0;
return;
}

float cumprod = 0;
for (int ic = 0; ic < mask_dim; ++ic) {
cumprod += cval * wval;
}

float alpha = 1.0f / (1.0f + exp(-cumprod));  // sigmoid
mask_out[dy * out_width + dx] = alpha * 255;
}


#### 4. YOLOv8-Seg推理

make yolo_seg


### 三、YOLOv8-Seg部署

#### 1. 源码下载

tensorRT_Pro-YOLOv8 的代码可以直接从 GitHub 官网上下载，源码下载地址是 https://github.com/Melody-Zhou/tensorRT_Pro-YOLOv8，Linux 下代码克隆指令如下：

git clone https://github.com/Melody-Zhou/tensorRT_Pro-YOLOv8.git


#### 2. 环境配置

tensorRT_Pro-YOLOv8 提供 CMakeLists.txt 和 Makefile 两种方式编译，二者选一即可

##### 2.1 配置CMakeLists.txt

1. 修改第 13 行，修改 OpenCV 路径

set(OpenCV_DIR   "/usr/local/include/opencv4")


2. 修改第 15 行，修改 CUDA 路径

set(CUDA_TOOLKIT_ROOT_DIR     "/usr/local/cuda-11.6")


3. 修改第 16 行，修改 cuDNN 路径

set(CUDNN_DIR    "/usr/local/cudnn8.4.0.27-cuda11.6")


4. 修改第 17 行，修改 tensorRT 路径

set(TENSORRT_DIR "/opt/TensorRT-8.4.1.5")


5. 修改第 20 行，修改 protobuf 路径

set(PROTOBUF_DIR "/home/jarvis/protobuf")

##### 2.2 配置Makefile

1. 修改第 4 行，修改 protobuf 路径

lean_protobuf  := /home/jarvis/protobuf


2. 修改第 5 行，修改 tensorRT 路径

lean_tensor_rt := /opt/TensorRT-8.4.1.5


3. 修改第 6 行，修改 cuDNN 路径

lean_cudnn     := /usr/local/cudnn8.4.0.27-cuda11.6


4. 修改第 7 行，修改 OpenCV 路径

lean_opencv    := /usr/local


5. 修改第 8 行，修改 CUDA 路径

lean_cuda      := /usr/local/cuda-11.6


#### 4. 源码修改

• 1. app_yolo_seg.cpp 329行，“yolov8s-seg” 修改为你导出的 ONNX 模型名
• 2. app_yolo_seg.cpp 10行，将 cocolabels 数组中的类别名称修改为你训练的类别

test(TRT::Model::FP32, "best")	// 修改1 329行"yolov8s-seg"改成"best"



OK！源码修改好了，Makefile 编译文件也搞定了，ONNX 模型也准备好了，现在可以编译运行了，直接在终端执行如下指令即可：

make yolo_seg
`

OK！以上就是使用 tensorRT_Pro-YOLOv8 推理 YOLOv8-Seg 的大致流程，若有问题，欢迎各位看官批评指正。

• 22
点赞
• 90
收藏
觉得还不错? 一键收藏
• 打赏
• 58
评论
11-06
07-02 1万+
04-27 1913
11-08 3278
11-29 1605
06-29 4736
07-06 2903
11-14 1516
01-04 681
09-07
03-27
03-27
04-28
09-07

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

¥1 ¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。