darknet自带的python接口用起来比原C代码要简单的多,而且比第三方写的各种keras、tensorflow版本优化都要好,因此我帮大家整合了一个可以实现在视频上任意画多边形,并且检测多边形内目标的Python接口程序。接下来一步一步教大家怎么操作:
- 首先按照链接的教程简单修改原代码,主要是加函数:https://blog.csdn.net/phinoo/article/details/83009061
- 按照上一步修改完后确保是可以运行的。然后修改darknet下python文件的代码,如下:
- 具体操作为点鼠标左键依次画点,鼠标右键将所画点围成多边形,鼠标中键清除多边形。
from ctypes import *
import random
import cv2
import numpy as np
import time
def sample(probs):
s = sum(probs)
probs = [a/s for a in probs]
r = random.uniform(0, 1)
for i in range(len(probs)):
r = r - probs[i]
if r <= 0:
return i
return len(probs)-1
def c_array(ctype, values):
arr = (ctype*len(values))()
arr[:] = values
return arr
class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]
class DETECTION(Structure):
_fields_ = [("bbox", BOX),
("classes", c_int),
("prob", POINTER(c_float)),
("mask", POINTER(c_float)),
("objectness", c_float),
("sort_class", c_int)]
class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]
class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]
lib = CDLL("libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)
set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]
make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE
get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
get_network_boxes.restype = POINTER(DETECTION)
make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)
free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]
free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]
reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]
load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p
do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
free_image = lib.free_image
free_image.argtypes = [IMAGE]
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]
predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)
ndarray_image = lib.ndarray_to_image
ndarray_image.argtypes = [POINTER(c_ubyte), POINTER(c_long), POINTER(c_long)]
ndarray_image.restype = IMAGE
def classify(net, meta, im):
out = predict_image(net, im)
res = []
for i in range(meta.classes):
res.append((meta.names[i], out[i]))
res = sorted(res, key=lambda x: -x[1])
return res
def detect(net, meta, im, thresh=.5, hier_thresh=.5, nms=.45):
#im = load_image(image, 0, 0)
num = c_int(0)
pnum = pointer(num)
predict_image(net, im)
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
num = pnum[0]
if (nms): do_nms_obj(dets, num, meta.classes, nms);
res = []
for j in range(num):
for i in range(meta.classes):
if dets[j].prob[i] > 0:
b = dets[j].bbox
res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
res = sorted(res, key=lambda x: -x[1])
free_image(im)
free_detections(dets, num)
return res
def nparray_to_image(img):
data = img.ctypes.data_as(POINTER(c_ubyte))
image = ndarray_image(data, img.ctypes.shape, img.ctypes.strides)
return image
tpPointsChoose = []
drawing = False
tempFlag = False
panduan = False
def draw_ROI(event, x, y, flags, param):
global point1, tpPointsChoose,pts,drawing, tempFlag
if event == cv2.EVENT_LBUTTONDOWN:
tempFlag = True
drawing = False
point1 = (x, y)
tpPointsChoose.append((x, y)) # 用于画点
if event == cv2.EVENT_RBUTTONDOWN:
tempFlag = True
drawing = True
pts = np.array([tpPointsChoose], np.int32)
print(pts)
if event == cv2.EVENT_MBUTTONDOWN:
tempFlag = False
drawing = True
tpPointsChoose = []
def isPoiWithinPoly(poi,poly):
#输入:点,多边形三维数组
#poly=[[[x1,y1],[x2,y2],……,[xn,yn],[x1,y1]],[[w1,t1],……[wk,tk]]] 三维数组
sinsc=0 #交点个数
for epoly in poly: #循环每条边的曲线->each polygon 是二维数组[[x1,y1],…[xn,yn]]
for i in range(len(epoly)): #[0,len-1]
s_poi=epoly[i]
s_poi_bf = epoly[i-1]
if i < (len(epoly)-2): #首先限制下标范围,防止超出
e_poi = epoly[i + 1]
e_poi_af = epoly[i + 2]
elif i == len(epoly)-2: # 若超出循环,则设置为起始值
e_poi = epoly[-1]
e_poi_af = epoly[0]
elif i == len(epoly)-1: # 若超出循环,则设置为起始值
e_poi = epoly[0]
e_poi_af = epoly[1]
if poi[1] == s_poi[1] == e_poi[1]: # 判断平行线段,是否位于区域中间位置,若位于,则应该 +1
if ((s_poi[1]-s_poi_bf[1])*(e_poi_af[1]-s_poi[1]) > 0):
sinsc += 1
continue
elif poi[1] == s_poi[1] != e_poi[1]: # 点
if ((s_poi_bf[1]-s_poi[1])*(s_poi[1]-e_poi[1])>0):
sinsc += 1
continue
elif s_poi[1] > poi[1] and e_poi[1] > poi[1]: # 线段在射线上边
continue
elif s_poi[1] < poi[1] and e_poi[1] < poi[1]: # 线段在射线下边
continue
elif s_poi[0] < poi[0] and e_poi[1] < poi[1]: # 线段在射线左边
continue
else:
xseg = e_poi[0] - (e_poi[0] - s_poi[0]) * (e_poi[1] - poi[1]) / (e_poi[1] - s_poi[1]) # 求交
if xseg < poi[0]: # 交点在射线起点的左侧
continue
else:
sinsc += 1 # 排除上述情况之后
return True if sinsc%2==1 else False
if __name__ == "__main__":
net = load_net("../cfg/yolov3-tiny.cfg".encode("utf-8"), "../backup/yolov3-tiny.weights".encode("utf-8"), 0)
meta = load_meta("../cfg/coco.data".encode("utf-8"))
cv2.namedWindow('video')
cv2.setMouseCallback('video', draw_ROI)
vid = cv2.VideoCapture('../video/input4.avi')
fps = vid.get(cv2.CAP_PROP_FPS)
size = (vid.get(cv2.CAP_PROP_FRAME_WIDTH), vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
print("fps: {}\nsize: {}".format(fps, size))
vfps = 0/ fps # 延迟播放用,根据运算能力调整
while True:
return_value,arr1=vid.read()
arr = cv2.resize(arr1, (1440, 900), interpolation=cv2.INTER_CUBIC)
# display the resulting frame
if (tempFlag == True and drawing == False): # 鼠标点击
cv2.circle(arr, point1, 5, (0, 255, 0), 2)
for i in range(len(tpPointsChoose) - 1):
cv2.line(arr, tpPointsChoose[i], tpPointsChoose[i + 1], (255, 0, 0), 2)
if (tempFlag == True and drawing == True): # 鼠标右击
cv2.polylines(arr, [pts], True, (0, 0, 255), thickness=2)
panduan = True
if (tempFlag == False and drawing == True): # 鼠标中键
for i in range(len(tpPointsChoose) - 1):
cv2.line(arr, tpPointsChoose[i], tpPointsChoose[i + 1], (0, 0, 255), 2)
panduan = False
if not return_value:
break
im=nparray_to_image(arr)
boxes= detect(net, meta, im)
for i in range(len(boxes)):
score=boxes[i][1]
label=boxes[i][0]
xmin=boxes[i][2][0]-boxes[i][2][2]/2
ymin=boxes[i][2][1]-boxes[i][2][3]/2
xmax=boxes[i][2][0]+boxes[i][2][2]/2
ymax=boxes[i][2][1]+boxes[i][2][3]/2
if(panduan == False):
cv2.rectangle(arr, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 3)
cv2.putText(arr, str(label), (int(xmin), int(ymin)), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.8,color=(0, 0, 255), thickness=3)
if(panduan == True):
min = isPoiWithinPoly([xmin,ymin], pts)
max = isPoiWithinPoly([xmax,ymax], pts)
if (min== True or max == True ):
cv2.rectangle(arr,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(255,0,0),3)
cv2.putText(arr,str(label),(int(xmin),int(ymin)),fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=0.8,color=(0,0,255),thickness=3)
time.sleep(vfps)
cv2.imshow('video', arr)
if cv2.waitKey(1) & 0xFF == ord('q'): # 按q键退出
break
cv2.destroyAllWindows()
注意一点,第1步修改完后make出来的libdarknet.so文件需要放到python那个文件夹里。
主要函数:
- draw_ROI:调用鼠标回调函数画多边形。
- isPoiWithinPoly:判断坐标点是否在多边形内。原理可参考如下链接:https://blog.csdn.net/xjtdw/article/details/99692671