视频检索
本文采用了三种方法进行视频检索
1 根据图像检索视频关键帧的算法
2 采用vedioSearch的方式进行检索
3 采用yolo视频目标检测的算法进行检索
下面针对三种方法进行详细说明
根据图像检索视频关键帧的算法
step1:提取新闻片头关键帧
step2:提取整个新闻的关键帧
step3:定义图像的相似度匹配算法
step4:根据相似的匹配算法匹配关键帧
代码如下:
#关键帧提取算法
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 4 16:48:57 2018
keyframes extract tool
this key frame extract algorithm is based on interframe difference.
The principle is very simple
First, we load the video and compute the interframe difference between each frames
Then, we can choose one of these three methods to extract keyframes, which are
all based on the difference method:
1. use the difference order
The first few frames with the largest average interframe difference
are considered to be key frames.
2. use the difference threshold
The frames which the average interframe difference are large than the
threshold are considered to be key frames.
3. use local maximum
The frames which the average interframe difference are local maximum are
considered to be key frames.
It should be noted that smoothing the average difference value before
calculating the local maximum can effectively remove noise to avoid
repeated extraction of frames of similar scenes.
After a few experiment, the third method has a better key frame extraction effect.
The original code comes from the link below, I optimized the code to reduce
unnecessary memory consumption.
https://blog.csdn.net/qq_21997625/article/details/81285096
@author: zyb_as
"""
import cv2
import operator
import numpy as np
import matplotlib.pyplot as plt
import sys
from scipy.signal import argrelextrema
def smooth(x, window_len=13, window='hanning'):
"""smooth the data using a window with requested size.
This method is based on the convolution of a scaled window with the signal.
The signal is prepared by introducing reflected copies of the signal
(with the window size) in both ends so that transient parts are minimized
in the begining and end part of the output signal.
input:
x: the input signal
window_len: the dimension of the smoothing window
window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
flat window will produce a moving average smoothing.
output:
the smoothed signal
example:
import numpy as np
t = np.linspace(-2,2,0.1)
x = np.sin(t)+np.random.randn(len(t))*0.1
y = smooth(x)
see also:
numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
scipy.signal.lfilter
TODO: the window parameter could be the window itself if an array instead of a string
"""
print(len(x), window_len)
# if x.ndim != 1:
# raise ValueError, "smooth only accepts 1 dimension arrays."
#
# if x.size < window_len:
# raise ValueError, "Input vector needs to be bigger than window size."
#
# if window_len < 3:
# return x
#
# if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
# raise ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
s = np.r_[2 * x[0] - x[window_len:1:-1],
x, 2 * x[-1] - x[-1:-window_len:-1]]
#print(len(s))
if window == 'flat': # moving average
w = np.ones(window_len, 'd')
else:
w = getattr(np, window)(window_len)
y = np.convolve(w / w.sum(), s, mode='same')
return y[window_len - 1:-window_len + 1]
class Frame:
"""class to hold information about each frame
"""
def __init__(self, id, diff):
self.id = id
self.diff = diff
def __lt__(self, other):
if self.id == other.id:
return self.id < other.id
return self.id < other.id
def __gt__(self, other):
return other.__lt__(self)
def __eq__(self, other):
return self.id == other.id and self.id == other.id
def __ne__(self, other):
return not self.__eq__(other)
def rel_change(a, b):
x = (b - a) / max(a, b)
print(x)
return x
if __name__ == "__main__":
print(sys.executable)
#Setting fixed threshold criteria
USE_THRESH = False
#fixed threshold value
THRESH = 0.6
#Setting fixed threshold criteria
USE_TOP_ORDER = False
#Setting local maxima criteria
USE_LOCAL_MAXIMA = True
#Number of top sorted frames
NUM_TOP_FRAMES = 50
#Video path of the source file
videopath = 'myvedio.flv'
#Directory to store the processed frames
dir = './myvedio_extract_result/'
#smoothing window size
len_window = int(50)
print("target video :" + videopath)
print("frame save directory: " + dir)
# load video and compute diff between frames
cap = cv2.VideoCapture(str(videopath))
curr_frame = None
prev_frame = None
frame_diffs = []
frames = []
success, frame = cap.read()
i = 0
while(success):
luv = cv2.cvtColor(frame, cv2.COLOR_BGR2LUV)
curr_frame = luv
if curr_frame is not None and prev_frame is not None:
#logic here
diff = cv2.absdiff(curr_frame, prev_frame)
diff_sum = np.sum(diff)
diff_sum_mean = diff_sum / (diff.shape[0] * diff.shape[1])
frame_diffs.append(diff_sum_mean)
frame = Frame(i, diff_sum_mean)
frames.append(frame)
prev_frame = curr_frame
i = i + 1
success, frame = cap.read()
cap.release()
# compute keyframe
keyframe_id_set = set()
if USE_TOP_ORDER:
# sort the list in descending order
frames.sort(key=operator.attrgetter("diff"), reverse=True)
for keyframe in frames[:NUM_TOP_FRAMES]:
keyframe_id_set.add(keyframe.id)
if USE_THRESH:
print("Using Threshold")
for i in range(1, len(frames)):
if (rel_change(np.float(frames[i - 1].diff), np.float(frames[i].diff)) >= THRESH):
keyframe_id_set.add(frames[i].id)
if USE_LOCAL_MAXIMA:
print("Using Local Maxima")
diff_array = np.array(frame_diffs)
sm_diff_array = smooth(diff_array, len_window)
frame_indexes = np.asarray(argrelextrema(sm_diff_array, np.greater))[0]
for i in frame_indexes:
keyframe_id_set.add(frames[i - 1].id)
plt.figure(figsize=(40, 20))
plt.locator_params(numticks=100)
plt.stem(sm_diff_array)
plt.savefig(dir + 'plot.png')匹配
# save all keyframes as image
cap = cv2.VideoCapture(str(videopath))
curr_frame = None
keyframes = []
success, frame = cap.read()
idx = 0
while(success):
if idx in keyframe_id_set:
name = "keyframe_" + str(idx) + ".jpg"
cv2.imwrite(dir + name, frame)
keyframe_id_set.remove(idx)
idx = idx + 1
success马, frame = cap.read()
cap.release()
计算图片的相似度
#计算图片的相似度距离
# -*- coding: utf-8 -*-
# !/usr/bin/env python
# @Time : 2018/11/17 14:52
# @Author : xhh
# @Desc : 余弦相似度计算
# @File : difference_image_consin.py
# @Software: PyCharm
from PIL import Image
from numpy import average, dot, linalg
# 对图片进行统一化处理
def get_thum(image, size=(64,64), greyscale=False):
# 利用image对图像大小重新设置, Image.ANTIALIAS为高质量的
image = image.resize(size, Image.ANTIALIAS)
if greyscale:
# 将图片转换为L模式,其为灰度图,其每个像素用8个bit表示
image = image.convert('L')
return image
# 计算图片的余弦距离
def image_similarity_vectors_via_numpy(image1, image2):
image1 = get_thum(image1)
image2 = get_thum(image2)
images = [image1, image2]
vectors = []
norms = []
for image in images:
vector = []
for pixel_tuple in image.getdata():
vector.append(average(pixel_tuple))
vectors.append(vector)
# linalg=linear(线性)+algebra(代数),norm则表示范数
# 求图片的范数??
norms.append(linalg.norm(vector, 2))
a, b = vectors
a_norm, b_norm = norms
# dot返回的是点积,对二维数组(矩阵)进行计算
res = dot(a / a_norm, b / b_norm)
return res
#'''
#image1 = Image.open('images/1.jpeg')
#image2 = Image.open('myimage64.jpg')
#cosin = image_similarity_vectors_via_numpy(image1, image2)
#print('图片余弦相似度',cosin)
关键帧匹配算法
image1=Image.open('11.jpg')
import os
cosin1=[]
imagematch=[]
for filename in os.listdir(r"./myvedio_extract_result"): #listdir的参数是文件夹的路径
#image2 = cv2.imread(filename) #此时的filename是文件夹中文件的名称
image2=Image.open(os.path.join('myvedio_extract_result',filename))
cosin = image_similarity_vectors_via_numpy(image1, image2)
cosin1.append(cosin)
imagematch.append(filename)
print(max(cosin1))
index=cosin1.index(max(cosin1))
print(index)
print(imagematch[index])
#train_dir +"/" + train_image_names[0]
这是最后的结果
1.0匹配的精度为1
258 第258个关键帧得到匹配
keyframe_56.jpg 第56帧被匹配
采用vedioSearch的方式进行检索
开源视频检索技术VedioSearch
https://blog.csdn.net/meloyi/article/details/53034823
https://github.com/andrefaraujo/videosearch
本项目主要做的事情是:
1 提取视频的关键帧(关键帧,其实就是视频中的一张图像)。 对视频进行镜头边缘检测。
2 对图片或者帧提取SIFT(尺度不变特征变换,用于在图像中检测出关键点,是一种局部特征描述子)。
3 为每张图片/关键帧、镜头或者视频片段 提取全局描述子(Fisher Vectors)。
4 使用Bloom Filters对每个视频片段进行索引。
5 使用图片对图片或者视频数据库进行检索。
6 用区间为0~1的平均精准度和精准度来评估检索结果。
本项目代码可以优化,得到最后的视频匹配效果
根据代码参考资料进行视频检索,得到一个很好的匹配度,但是不能够有实时的匹配
采用yolo视频目标检测的算法进行检索
修改yolov3目标检测到视频的目标检测
step1 安装cuda,cuddn,darkNet
step2 运行darknet 图像的目标检测
step3 修改darknet 可以运行视频的目标检测
详细步骤如下:
- 下载代码:
git clone https://github.com/pjreddie/darknet
- 编译代码
cd darknet
make
- 下载权重文件
wget https://pjreddie.com/media/files/yolov3.weights
- 修改代码运行
我们首先需要将“darknet”文件夹内的“libdarknet.so”文件移动到“darknet/python”内
打开“darknet/cfg/coco.data”文件,将“names”也改为绝对路径:
进入“darknet/python”然后执行“darknet.py”文件即可
- 修改代码进行视频的目标检测
1.修改src/image.c
#ifdef NUMPY
image ndarray_to_image(unsigned char* src, long* shape, long* strides)
{
int h = shape[0];
int w = shape[1];
int c = shape[2];
int step_h = strides[0];
int step_w = strides[1];
int step_c = strides[2];
image im = make_image(w, h, c);
int i, j, k;
int index1, index2 = 0;
for(i = 0; i < h; ++i){
for(k= 0; k < c; ++k){
for(j = 0; j < w; ++j){
index1 = k*w*h + i*w + j;
index2 = step_h*i + step_w*j + step_c*k;
//fprintf(stderr, "w=%d h=%d c=%d step_w=%d step_h=%d step_c=%d \n", w, h, c, step_w, step_h, step_c);
//fprintf(stderr, "im.data[%d]=%u data[%d]=%f \n", index1, src[index2], index2, src[index2]/255.);
im.data[index1] = src[index2]/255.;
}
}
}
rgbgr_image(im);
return im;
}
#endif
2.然后在src/image.h大概22行插入:
#ifdef NUMPY
image ndarray_to_image(unsigned char* src, long* shape, long* strides);
#endif
3.修改Makefile文件:
GPU=1
CUDNN=1
OPENCV=1
# 添加
NUMPY=1
OPENMP=1
DEBUG=1
ARCH= -gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_50,code=[sm_50,compute_50] \
-gencode arch=compute_52,code=[sm_52,compute_52] \
-gencode arch=compute_70,code=[sm_70,compute_70] \
-gencode arch=compute_75,code=[sm_75,compute_75]
# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated?
# This is what I use, uncomment if you know your arch and want to specify
# ARCH= -gencode arch=compute_52,code=compute_52
VPATH=./src/:./examples
SLIB=libdarknet.so
ALIB=libdarknet.a
EXEC=darknet
OBJDIR=./obj/
CC=gcc
CPP=g++
NVCC=nvcc
AR=ar
ARFLAGS=rcs
OPTS=-Ofast
LDFLAGS= -lm -pthread
COMMON= -Iinclude/ -Isrc/
CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC
ifeq ($(OPENMP), 1)
CFLAGS+= -fopenmp
endif
ifeq ($(DEBUG), 1)
OPTS=-O0 -g
endif
CFLAGS+=$(OPTS)
ifeq ($(OPENCV), 1)
COMMON+= -DOPENCV
CFLAGS+= -DOPENCV
LDFLAGS+= `pkg-config --libs opencv` -lstdc++
COMMON+= `pkg-config --cflags opencv`
endif
# 添加
ifeq ($(NUMPY), 1)
COMMON+= -DNUMPY -I/home/sbs/anaconda3/envs/tracy/include/python3.6m/ -I/home/sbs/anaconda3/envs/tracy/lib/python3.6/site-packages/numpy/core/include/numpy/
CFLAGS+= -DNUMPY
endif
ifeq ($(GPU), 1)
COMMON+= -DGPU -I/usr/local/cuda-10.0-cudnn-7.3.1/include/
CFLAGS+= -DGPU
LDFLAGS+= -L/usr/local/cuda-10.0-cudnn-7.3.1/lib64 -lcuda -lcudart -lcublas -lcurand
endif
ifeq ($(CUDNN), 1)
COMMON+= -DCUDNN
CFLAGS+= -DCUDNN
LDFLAGS+= -lcudnn
endif
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o
EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o
ifeq ($(GPU), 1)
LDFLAGS+= -lstdc++
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o
endif
EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA))
OBJS = $(addprefix $(OBJDIR), $(OBJ))
DEPS = $(wildcard src/*.h) Makefile include/darknet.h
all: obj backup results $(SLIB) $(ALIB) $(EXEC)
#all: obj results $(SLIB) $(ALIB) $(EXEC)
$(EXEC): $(EXECOBJ) $(ALIB)
$(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB)
$(ALIB): $(OBJS)
$(AR) $(ARFLAGS) $@ $^
$(SLIB): $(OBJS)
$(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS)
$(OBJDIR)%.o: %.cpp $(DEPS)
$(CPP) $(COMMON) $(CFLAGS) -c $< -o $@
$(OBJDIR)%.o: %.c $(DEPS)
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
$(OBJDIR)%.o: %.cu $(DEPS)
$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
obj:
mkdir -p obj
backup:
mkdir -p backup
results:
mkdir -p results
.PHONY: clean
clean:
rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/*
4.然后make clean再重新make -j8就可以
5.修改python/darknet.py文件,这里我直接贴完整的程序了
from ctypes import *
import math
import random
import time
import numpy as np
import cv2
import os
import sys
def sample(probs):
s = sum(probs)
probs = [a/s for a in probs]
r = random.uniform(0, 1)
for i in range(len(probs)):
r = r - probs[i]
if r <= 0:
return i
return len(probs)-1
def c_array(ctype, values):
arr = (ctype*len(values))()
arr[:] = values
return arr
class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]
class DETECTION(Structure):
_fields_ = [("bbox", BOX),
("classes", c_int),
("prob", POINTER(c_float)),
("mask", POINTER(c_float)),
("objectness", c_float),
("sort_class", c_int)]
class IMAGE(Structure):
_fields_ = [("w", c_int),
("h", c_int),
("c", c_int),
("data", POINTER(c_float))]
class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]
#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("../libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int
predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)
set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]
make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE
get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
get_network_boxes.restype = POINTER(DETECTION)
make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)
free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]
free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]
reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]
load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p
do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
free_image = lib.free_image
free_image.argtypes = [IMAGE]
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
# 添加以处理视频
ndarray_image = lib.ndarray_to_image
ndarray_image.argtypes = [POINTER(c_ubyte), POINTER(c_long), POINTER(c_long)]
ndarray_image.restype = IMAGE
rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]
predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)
def classify(net, meta, im):
out = predict_image(net, im)
res = []
for i in range(meta.classes):
res.append((meta.names[i], out[i]))
res = sorted(res, key=lambda x: -x[1])
return res
"""
Yolo-v3目前耗时过长的步骤
1.输入图像的预处理阶段
2.python接口调用网络执行一次推理过程
"""
def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
# preprocess_image_time = time.time()
# 大约0.1131s
im = load_image(image, 0, 0)
# print("Yolo Preprocess image time in python version:", (time.time() - preprocess_image_time))
num = c_int(0)
pnum = pointer(num)
# start_time = time.time()
# 大概0.129秒左右
predict_image(net, im)
# print("Yolo Do inference time in python version:", (time.time() - start_time))
# get_detection_time = time.time()
# 大约0.0022s
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
# print("Yolo Get detections time in python version:", (time.time() - get_detection_time))
num = pnum[0]
# do_nms_time = time.time()
# 可以忽略不计
if (nms): do_nms_obj(dets, num, meta.classes, nms)
# print("Yolo Do nms time in python version:", (time.time() - do_nms_time))
res = []
for j in range(num):
for i in range(meta.classes):
if dets[j].prob[i] > 0:
b = dets[j].bbox
res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
res = sorted(res, key=lambda x: -x[1])
free_image(im)
free_detections(dets, num)
return res
# 添加以处理视频
def detect_im(net, meta, im, thresh=.5, hier_thresh=.5, nms=.45):
# to_image_time = time.time()
# 大约0.0012~0.0013秒
im, image = array_to_image(im)
# print("to_image time:", (time.time() - to_image_time))
# rgbgr_image_time = time.time()
# 大约0.0013秒
rgbgr_image(im)
# print("rgbgr_image time:", (time.time() - rgbgr_image_time))
num = c_int(0)
pnum = pointer(num)
# do_inference_time = time.time()
# 大约0.083秒
predict_image(net, im)
# print("Do inference time:", (time.time() - do_inference_time))
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
num = pnum[0]
if (nms): do_nms_obj(dets, num, meta.classes, nms)
res = []
for j in range(num):
a = dets[j].prob[0:meta.classes]
if any(a):
ai = np.array(a).nonzero()[0]
for i in ai:
b = dets[j].bbox
res.append((meta.names[i], dets[j].prob[i],
(b.x, b.y, b.w, b.h)))
res = sorted(res, key=lambda x: -x[1])
if isinstance(image, bytes):
free_image(im)
free_detections(dets, num)
return res
def array_to_image(arr):
# need to return old values to avoid python freeing memory
arr = arr.transpose(2,0,1)
c, h, w = arr.shape[0:3]
arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
data = arr.ctypes.data_as(POINTER(c_float))
im = IMAGE(w,h,c,data)
return im, arr
def get_folderImages(folder):
all_files = os.listdir(folder)
abs_path = [os.path.join(folder, i) for i in all_files]
return abs_path
def convertBack(x, y, w, h):
xmin = int(round(x - (w / 2)))
xmax = int(round(x + (w / 2)))
ymin = int(round(y - (h / 2)))
ymax = int(round(y + (h / 2)))
return xmin, ymin, xmax, ymax
def init():
net = load_net("../cfg/yolov3.cfg".encode("utf-8"), "../cfg/yolov3.weights".encode("utf-8"), 0)
meta = load_meta("../cfg/coco.data".encode("utf-8"))
return net, meta
def image_processing():
net, meta = init()
folder = "images"
save_folder = "results"
each_process_time = []
for image_path in get_folderImages(folder):
image = cv2.imread(image_path)
start_time = time.time()
r = detect(net, meta, image_path.encode("utf-8"))
processing_time = time.time() - start_time
each_process_time.append(processing_time)
for i in range(len(r)):
x, y, w, h = r[i][2][0], r[i][2][1], r[i][2][2], r[i][2][3]
topleft, topright, bottomleft, bottomright = convertBack(float(x), float(y), float(w), float(h))
result = cv2.rectangle(
image,
(topleft, topright),
(bottomleft, bottomright),
(0, 255, 255),
2
)
cv2.putText(
result,
bytes.decode(r[i][0]),
(topleft, topright),
cv2.FONT_HERSHEY_SIMPLEX,
1.0,
(0, 0, 255),
2
)
save_path = os.path.join(save_folder, image_path.split('/')[-1].split(".jpg")[0] + "-result.jpg")
cv2.imwrite(save_path, result)
average_processing_time = np.mean(each_process_time)
print("Yolo-v3 COCO Average each Image processing Time:\n")
print(average_processing_time)
def video_processing():
set_gpu(7)
net, meta = init()
processing_path = "small.mp4"
cam = cv2.VideoCapture(processing_path)
total_frames = cam.get(cv2.CAP_PROP_FRAME_COUNT)
fps = cam.get(cv2.CAP_PROP_FPS)
frame_size = (int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# fourcc = int(cam.get(cv2.CAP_PROP_FOURCC))
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
processing_result_name = processing_path.split(".mp4")[0] + "-result.mp4"
result = cv2.VideoWriter(processing_result_name, fourcc, fps, frame_size)
timeF = 1
c = 1
print("opencv?", cam.isOpened())
print("fps:", fps)
print("decode style:", fourcc)
print("size:", frame_size)
print("total frames:", total_frames)
start_total = time.time()
while True:
frame_start = time.time()
_, img = cam.read()
if (c % timeF == 0 or c == total_frames):
if img is not None:
r = detect_im(net, meta, img)
for i in range(len(r)):
x, y, w, h = r[i][2][0], r[i][2][1], r[i][2][2], r[i][2][3]
topleft, topright, bottomleft, bottomright = convertBack(float(x), float(y), float(w), float(h))
img = cv2.rectangle(
img,
(topleft, topright),
(bottomleft, bottomright),
(0, 255, 255),
1
)
label_score = "{}:{:.2f}".format(bytes.decode(r[i][0]), r[i][1])
cv2.putText(
img,
label_score,
(topleft, topright),
cv2.FONT_HERSHEY_SIMPLEX,
1.0,
(0, 0, 255),
1
)
result.write(img)
else:
result.write(img)
c += 1
if c > total_frames:
print("Finished Processing!")
break
print("processing one frame total time:", (time.time() - frame_start))
print()
processing_time = time.time() - start_total
cam.release()
result.release()
post_compression(processing_result_name)
print("Yolo-v3 COCO one Video Process Time:\n")
print(processing_time)
if __name__ == "__main__":
#net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0)
#im = load_image("data/wolf.jpg", 0, 0)
#meta = load_meta("cfg/imagenet1k.data")
#r = classify(net, meta, im)
#print r[:10]
# net = load_net("../cfg/yolov3.cfg".encode("utf-8"), "../cfg/yolov3.weights".encode("utf-8"), 0)
# meta = load_meta("../cfg/coco.data".encode("utf-8"))
# start_time = time.time()
# r = detect(net, meta, "../data/car.jpg".encode("utf-8"))
# print("Inference time:{:.4f}".format(time.time() - start_time))
# print(r)
image_processing()
# video_processing()
运行
./darknet detector demo cfg/coco.data cfg/yolov3.cfg cfg/yolov3.weights python/videos/test.mp4