目标
使用yoloV3 darknet 自带的 python 接口(即darknet.py 文件)处理图片和视频。
具体的说有三种场景:
1 指定一张图片的位置,进行model预测+画框+另存为新图片;
2指定一视频的位置,进行抽帧+model预测+画框+另存为新帧+新帧拼成新视频保存;
3 无需指定图片位置,直接处理视频帧(在内存中/从摄像头读取),进行model预测+画框;
在场景3中,视频帧获取可以是cv2.imread() / cv2.VideoCapture.() 等方法读图片/读视频/调摄像头来的,它没有使用darknet.py 定义的的 IMAGE object (这个object必须要传入文件路径)。
project 下载
git clone https://github.com/pjreddie/darknet
cd darknet
#make之前要改一些配置 ,具体操作见 我的上一份博客的结尾部分
#(https://blog.csdn.net/qq_20241587/article/details/111176541)
make
#下载模型文件,大概200+M ,复制到 darknet 文件夹下
wget https://pjreddie.com/media/files/yolov3.weights
./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg #默认使用第1块GPU
./darknet -nogpu detect cfg/yolov3.cfg yolov3.weights data/dog.jpg #-nogpu:不使用GPU
#在darknet文件夹下的predictions.jpg即是检测后的结果
处理单张图片
- 确保darknet环境已经make OK 并测试OK了 。细节可参考我上一份博客
- 新建空白 XX.py文件,拷贝本小节的代码,自行更改其中图片文件路径(在最后几行)
- 执行XX.py
这小节实现了一个目标,即:指定一张图片的路径,指定检测生成的新图片的存放路径,进行 model检测+画框+另存为新图片.
下图中,左边是处理前的样子,右边是处理后的样子
本小节的代码:
from ctypes import *
import math
import random
import cv2
import os
def sample(probs):
s = sum(probs)
probs = [a / s for a in probs]
r = random.uniform(0, 1)
for i in range(len(probs)):
r = r - probs[i]
if r <= 0:
return i
return len(probs) - 1
def c_array(ctype, values):
arr = (ctype * len(values))()
arr[:] = values
return arr
class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]
class DETECTION(Structure):
_fields_ = [("bbox", BOX),
("classes", c_int),
("prob", POINTER(c_float)),
("mask", POINTER(c_float)),
("objectness", c_float),
("sort_class", c_int)]
class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]
class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]
lib = CDLL("/home/jiantang/桌面/enn/workcode/yoloV3/github/darknet/libdarknet.so", RTLD_GLOBAL)
# lib = CDLL("libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)
set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]
make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE
get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
get_network_boxes.restype = POINTER(DETECTION)
make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)
free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]
free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]
reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]
load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p
do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
free_image = lib.free_image
free_image.argtypes = [IMAGE]
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]
predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_im