先将权重和模型打包在一起,便于独立使用,打包代码如下:
import torch
from thop import profile, clever_format
from models.common import DetectMultiBackend
# model input
input = torch.empty(size=(1,3,640,640),dtype=torch.float32, device="cpu")
# weights
weights = r"E:\Python_C++_Demo\yolov5-master\python部署\weights\seg\yolov5s-seg.pt"
# model
model = DetectMultiBackend(weights=weights)
# save model
torch.save(model, "yolov5s_seg_save.pt")
# model test
model = torch.load("yolov5s_seg_save.pt")
# model flops and paramaters
flops, parameters = profile(model, inputs=(input,))
print("flops:", flops, "parameters:", parameters)
# 格式化输出计算量和参数量
flops, parameters = clever_format([flops, parameters], "%.3f")
# 打印结果
print(f"Model FLOPs: {flops}")
print(f"Model Params: {parameters}")
模型推理代码如下:
import cv2
import torch
from utils.general import non_max_suppression, scale_boxes
# from utils.segment.general import process_mask
from utils.augmentations import letterbox
import torch.nn.functional as F
import numpy as np
import yaml
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
"""
Crop before upsample.
proto_out: [mask_dim, mask_h, mask_w]
out_masks: [n, mask_dim], n is number of masks after nms
bboxes: [n, 4], n is number of masks after nms
shape:input_image_size, (h, w)
return: h, w, n
"""
c, mh, mw = protos.shape # CHW
ih, iw = shape
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW .view 重塑形状
downsampled_bboxes = bboxes.clone()
downsampled_bboxes[:, 0] *= mw / iw
downsampled_bboxes[:, 2] *= mw / iw
downsampled_bboxes[:, 3] *= mh / ih
downsampled_bboxes[:, 1] *= mh / ih
masks = crop_mask(masks, downsampled_bboxes) # CHW
if upsample:
masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0] # CHW torch.nn.functional.interpolate 函数来调整 masks 的尺寸,具体来说是对 masks 进行双线性插值(bilinear interpolation)。F.interpolate 是 PyTorch 中常用的函数,
return masks.gt_(0.5)
def crop_mask(masks, boxes):
"""
"Crop" predicted masks by zeroing out everything not in the predicted bbox. Vectorized by Chong (thanks Chong).
Args:
- masks should be a size [n, h, w] tensor of masks
- boxes should be a size [n, 4] tensor of bbox coords in relative point form
"""
n, h, w = masks.shape
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) torch.chunk 函数可以方便地将一个大张量分割为多个更小的张量,便于进行并行处理或其它操作。
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) # 通过列索引 r 和行索引 c 生成一个矩形区域掩码,并将其应用到原始掩码上。结果掩码 result_masks 中只有在指定矩形区域内的值保留,其他位置的值为 0。
device = "cuda" if torch.cuda.is_available() else "cpu"
# 获取类别
with open(r"E:\Python_C++_Demo\yolov5-master\data\coco.yaml", "r", encoding="utf-8") as file:
config = yaml.safe_load(file)
classes = config["names"]
# print(classes)
# color = [tuple(np.random.randint(0, 256, size=3).tolist()) for _ in range(80)] # 创建与类别数相等的随机颜色
color = [(0,255,0),(114,114,114),(0,144,144),(114,114,0),(114,0,114),(255,0,0)]
def detect(img_path, weights):
img = cv2.imread(img_path)
h,w,c = img.shape
im, ratio, (dw,dh) = letterbox(img)
im = im.transpose((2,0,1))[::-1] # HWC to CHW, BGR to RGB
im = np.ascontiguousarray(im) # 转为连续内存
im = torch.from_numpy(im).to(device) # 转Tensor
im = im.float() # float32 如果需要半精度使用im.half() 同时模型也需要半精度处理:model.half() if fp16 else model.float() # 模型半精度
im /= 255 # 归一化
im = torch.unsqueeze(im, dim=0) # CHW 2 NCHW
model = torch.load(weights).to(device) # 加载模型
model.eval() # 开启测试
pred, proto = model(im)[:2]
pred = non_max_suppression(pred, nm=32) # nms
mask = np.zeros_like(img) # 创建原图像大小的掩码
for i, det in enumerate(pred):
if len(det):
masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], img.shape).round() # rescale boxes to im0 size
masks[masks > 0] = 255
masks = masks.cpu().numpy().astype(np.uint8)
for i, tar in enumerate(det.cpu().numpy()):
print("cls:", int(tar[5]), "conf:", round(tar[4], 2))
contours, _ = cv2.findContours(masks[i], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
new_contours = []
for contour in contours:
contour[:, :, 0] = contour[:, :, 0].astype(np.float32) / (masks.shape[2] / w)
contour[:, :, 1] = contour[:, :, 1].astype(np.float32) / (masks.shape[1] / h)
new_contours.append(contour)
cv2.fillPoly(mask, new_contours, color[int(tar[5])])
cv2.putText(img, f"cls: {classes[int(tar[5])]} conf: {tar[4]:.2f}", (int(tar[0]), int(tar[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color[int(tar[5])], 2)
img = cv2.addWeighted(img, 0.7, mask, 0.5, 0)
return img
if __name__ == '__main__':
img_path = r"E:\Python_C++_Demo\yolov5-master\data\images\bus.jpg"
weights = "yolov5s_seg_save.pt"
res_img = detect(img_path, weights)
cv2.imwrite("res_img.jpg", res_img)
cv2.imshow("img", res_img)
cv2.waitKey(0)
使用官方图片:bus.jpg,推理结果如下:
总结:和pred同时输出的,seg部分的proto的后处理部分需要仔细理解一下,二pred输以及后处理和detect基本完全一样。
yolo的语义分割效果看起来并不是很好,当然,coco数据集的数据标注也并非挑不出毛病,因此,不排除效果差的原因来自于数据。