YOLO V3利用Python接口实现感兴趣区域的目标检测

最新推荐文章于 2024-04-08 10:30:42 发布

greatsam

最新推荐文章于 2024-04-08 10:30:42 发布

阅读量1.5k

点赞数 2

文章标签： python 深度学习

本文链接：https://blog.csdn.net/greatsam/article/details/107722909

版权

darknet自带的python接口用起来比原C代码要简单的多，而且比第三方写的各种keras、tensorflow版本优化都要好，因此我帮大家整合了一个可以实现在视频上任意画多边形，并且检测多边形内目标的Python接口程序。接下来一步一步教大家怎么操作：

首先按照链接的教程简单修改原代码，主要是加函数：https://blog.csdn.net/phinoo/article/details/83009061
按照上一步修改完后确保是可以运行的。然后修改darknet下python文件的代码，如下：
具体操作为点鼠标左键依次画点，鼠标右键将所画点围成多边形，鼠标中键清除多边形。


from ctypes import *
import random
import cv2
import numpy as np
import time


def sample(probs):
    s = sum(probs)
    probs = [a/s for a in probs]
    r = random.uniform(0, 1)
    for i in range(len(probs)):
        r = r - probs[i]
        if r <= 0:
            return i
    return len(probs)-1

def c_array(ctype, values):
    arr = (ctype*len(values))()
    arr[:] = values
    return arr

class BOX(Structure):
    _fields_ = [("x", c_float),
                ("y", c_float),
                ("w", c_float),
                ("h", c_float)]

class DETECTION(Structure):
    _fields_ = [("bbox", BOX),
                ("classes", c_int),
                ("prob", POINTER(c_float)),
                ("mask", POINTER(c_float)),
                ("objectness", c_float),
                ("sort_class", c_int)]


class IMAGE(Structure):
    _fields_ = [("w", c_int),
                ("h", c_int),
                ("c", c_int),
                ("data", POINTER(c_float))]

class METADATA(Structure):
    _fields_ = [("classes", c_int),
                ("names", POINTER(c_char_p))]

    

lib = CDLL("libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int

predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)

set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]

make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE

get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
get_network_boxes.restype = POINTER(DETECTION)

make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)

free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]

free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]

network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]

reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]

load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p

do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

free_image = lib.free_image
free_image.argtypes = [IMAGE]

letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE

load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA

load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE

rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]

predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)

ndarray_image = lib.ndarray_to_image
ndarray_image.argtypes = [POINTER(c_ubyte), POINTER(c_long), POINTER(c_long)]
ndarray_image.restype = IMAGE


def classify(net, meta, im):
    out = predict_image(net, im)
    res = []
    for i in range(meta.classes):
        res.append((meta.names[i], out[i]))
    res = sorted(res, key=lambda x: -x[1])
    return res

def detect(net, meta, im, thresh=.5, hier_thresh=.5, nms=.45):
    #im = load_image(image, 0, 0)
    num = c_int(0)
    pnum = pointer(num)
    predict_image(net, im)
    dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
    num = pnum[0]
    if (nms): do_nms_obj(dets, num, meta.classes, nms);

    res = []
    for j in range(num):
        for i in range(meta.classes):
            if dets[j].prob[i] > 0:
                b = dets[j].bbox
                res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
    res = sorted(res, key=lambda x: -x[1])
    free_image(im)
    free_detections(dets, num)
    return res

def nparray_to_image(img):
    data = img.ctypes.data_as(POINTER(c_ubyte))
    image = ndarray_image(data, img.ctypes.shape, img.ctypes.strides)
    return image

tpPointsChoose = []
drawing = False
tempFlag = False
panduan = False
def draw_ROI(event, x, y, flags, param):
    global point1, tpPointsChoose,pts,drawing, tempFlag
    if event == cv2.EVENT_LBUTTONDOWN:
        tempFlag = True
        drawing = False
        point1 = (x, y)
        tpPointsChoose.append((x, y))  # 用于画点
    if event == cv2.EVENT_RBUTTONDOWN:
        tempFlag = True
        drawing = True
        pts = np.array([tpPointsChoose], np.int32)
        print(pts)
    if event == cv2.EVENT_MBUTTONDOWN:
        tempFlag = False
        drawing = True
        tpPointsChoose = []

def isPoiWithinPoly(poi,poly):
    #输入：点，多边形三维数组
    #poly=[[[x1,y1],[x2,y2],……,[xn,yn],[x1,y1]],[[w1,t1],……[wk,tk]]] 三维数组
    sinsc=0 #交点个数
    for epoly in poly: #循环每条边的曲线->each polygon 是二维数组[[x1,y1],…[xn,yn]]
        for i in range(len(epoly)): #[0,len-1]
            s_poi=epoly[i]
            s_poi_bf = epoly[i-1]

            if i < (len(epoly)-2):  #首先限制下标范围，防止超出
                e_poi = epoly[i + 1]
                e_poi_af = epoly[i + 2]
            elif i == len(epoly)-2: # 若超出循环，则设置为起始值
                e_poi = epoly[-1]
                e_poi_af = epoly[0]
            elif i == len(epoly)-1: # 若超出循环，则设置为起始值
                e_poi = epoly[0]
                e_poi_af = epoly[1]

            if poi[1] == s_poi[1] == e_poi[1]: # 判断平行线段，是否位于区域中间位置，若位于，则应该 +1
                if ((s_poi[1]-s_poi_bf[1])*(e_poi_af[1]-s_poi[1]) > 0):
                    sinsc += 1
                    continue

            elif poi[1] == s_poi[1] != e_poi[1]: # 点
                if ((s_poi_bf[1]-s_poi[1])*(s_poi[1]-e_poi[1])>0):
                    sinsc += 1
                    continue

            elif s_poi[1] > poi[1] and e_poi[1] > poi[1]:  # 线段在射线上边
                continue
            elif s_poi[1] < poi[1] and e_poi[1] < poi[1]:  # 线段在射线下边
                continue
            elif s_poi[0] < poi[0] and e_poi[1] < poi[1]:  # 线段在射线左边
                continue
            else:
                xseg = e_poi[0] - (e_poi[0] - s_poi[0]) * (e_poi[1] - poi[1]) / (e_poi[1] - s_poi[1])  # 求交
                if xseg < poi[0]:  # 交点在射线起点的左侧
                    continue
                else:
                    sinsc += 1  # 排除上述情况之后
    return True if sinsc%2==1 else  False
    
if __name__ == "__main__":
    net = load_net("../cfg/yolov3-tiny.cfg".encode("utf-8"), "../backup/yolov3-tiny.weights".encode("utf-8"), 0)
    meta = load_meta("../cfg/coco.data".encode("utf-8"))
    cv2.namedWindow('video')
    cv2.setMouseCallback('video', draw_ROI)
    vid = cv2.VideoCapture('../video/input4.avi')
    fps = vid.get(cv2.CAP_PROP_FPS)
    size = (vid.get(cv2.CAP_PROP_FRAME_WIDTH), vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print("fps: {}\nsize: {}".format(fps, size))
    vfps = 0/ fps  # 延迟播放用，根据运算能力调整

    while True:
        return_value,arr1=vid.read()
        arr = cv2.resize(arr1, (1440, 900), interpolation=cv2.INTER_CUBIC)
        # display the resulting frame
        if (tempFlag == True and drawing == False):  # 鼠标点击
            cv2.circle(arr, point1, 5, (0, 255, 0), 2)
            for i in range(len(tpPointsChoose) - 1):
                cv2.line(arr, tpPointsChoose[i], tpPointsChoose[i + 1], (255, 0, 0), 2)
        if (tempFlag == True and drawing == True):  # 鼠标右击
            cv2.polylines(arr, [pts], True, (0, 0, 255), thickness=2)
            panduan = True
        if (tempFlag == False and drawing == True):  # 鼠标中键
            for i in range(len(tpPointsChoose) - 1):
                cv2.line(arr, tpPointsChoose[i], tpPointsChoose[i + 1], (0, 0, 255), 2)
            panduan = False
        if not return_value:
            break	
        im=nparray_to_image(arr)
        boxes= detect(net, meta, im)
        for i in range(len(boxes)):
            score=boxes[i][1]
            label=boxes[i][0]
            xmin=boxes[i][2][0]-boxes[i][2][2]/2
            ymin=boxes[i][2][1]-boxes[i][2][3]/2
            xmax=boxes[i][2][0]+boxes[i][2][2]/2
            ymax=boxes[i][2][1]+boxes[i][2][3]/2
            if(panduan == False):
                cv2.rectangle(arr, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 3)
                cv2.putText(arr, str(label), (int(xmin), int(ymin)), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.8,color=(0, 0, 255), thickness=3)
            if(panduan == True):
                min = isPoiWithinPoly([xmin,ymin], pts)
                max = isPoiWithinPoly([xmax,ymax], pts)
                if (min== True or max == True ):
                    cv2.rectangle(arr,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(255,0,0),3)
                    cv2.putText(arr,str(label),(int(xmin),int(ymin)),fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=0.8,color=(0,0,255),thickness=3)
        time.sleep(vfps)
        cv2.imshow('video', arr)
        if cv2.waitKey(1) & 0xFF == ord('q'):  # 按q键退出
            break
    cv2.destroyAllWindows()

注意一点，第1步修改完后make出来的libdarknet.so文件需要放到python那个文件夹里。

主要函数：