目的
使用yoloV4 darknet 自带的 python 接口处理图片和视频。
具体的说有三种场景:
1 指定一张图片的位置/或使用cv2.imread() 的结果,进行model预测+画框+另存为新图片;
2指定一视频的位置,进行抽帧+model预测+画框+另存为新帧+新帧拼成新视频保存;
3 启动一个Flask 服务 对外提供1的功能
实现
作者提供的darknet.py有些复杂,我觉得不太好用,于是提供一个darknet_me.py实现上述功能1
'''
darknet.py 核心函数:load_network、detect_image draw_boxes bbox2points
darknet_images.py 核心函数: image_detection,此函数需要修改成输入图像
darknet官方写的预测图像输出依旧为正方形,而非原图!因此要转换成原图
'''
import os
import cv2
import numpy as np
import darknet
import time
class Detect:
def __init__(self, metaPath, configPath, weightPath, gpu_id=2, batch=1):
'''
:param metaPath: ***.data 存储各种参数
:param configPath: ***.cfg 网络结构文件
:param weightPath: ***.weights yolo的权重
:param batch: ########此类只支持batch=1############
'''
assert batch == 1, "batch必须为1"
# 设置gpu_id
darknet.set_gpu(0)
# 网络
network, class_names, class_colors = darknet.load_network(
configPath,
metaPath,
weightPath,
batch_size=batch
)
self.network = network
self.class_names = class_names
self.class_colors = class_colors
def bbox2point(self, bbox):
x, y, w, h = bbox
xmin = x - (w / 2)
xmax = x + (w / 2)
ymin = y - (h / 2)
ymax = y + (h / 2)
return (xmin, ymin, xmax, ymax)
def point2bbox(self, point):
x1, y1, x2, y2 = point
x = (x1 + x2) / 2
y = (y1 + y2) / 2
w = (x2 - x1)
h = (y2 - y1)
return (x, y, w, h)
def image_detection(self, image_bgr, network, class_names, class_colors, thresh=0.5):
# 判断输入图像是否为3通道
if len(image_bgr.shape) == 2:
image_bgr = np.stack([image_bgr] * 3, axis=-1)
# 获取原始图像大小
orig_h, orig_w = image_bgr.shape[:2]
width = darknet.network_width(network)
height = darknet.network_height(network)
darknet_image = darknet.make_image(width, height, 3)
# image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
image_resized = cv2.resize(image_rgb, (width, height), interpolation=cv2.INTER_LINEAR)
darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
detections = darknet.detect_image(network, class_names, darknet_image, thresh=thresh)
darknet.free_image(darknet_image)
'''注意:这里原始代码依旧是608*608,而不是原图大小,因此我们需要转换'''
new_detections = []
for detection in detections:
pred_label, pred_conf, (x, y, w, h) = detection
new_x = x / width * orig_w
new_y = y / height * orig_h
new_w = w / width * orig_w
new_h = h / height * orig_h
# 可以约束一下
(x1, y1, x2, y2) = self.bbox2point((new_x, new_y, new_w, new_h))
x1 = x1 if x1 > 0 else 0
x2 = x2 if x2 < orig_w else orig_w
y1 = y1 if y1 > 0 else 0
y2 = y2 if y2 < orig_h else orig_h
(new_x, new_y, new_w, new_h) = self.point2bbox((x1, y1, x2, y2))
new_detections.append((pred_label, pred_conf, (new_x, new_y, new_w, new_h)))
image = darknet.draw_boxes(new_detections, image_rgb, class_colors)
return cv2.cvtColor(image, cv2.COLOR_RGB2BGR),