1 说明:
=====
1.1 技术要点:yolov3+python+cv2(OpenCV)实现目标检测,本文由浅入深,代码注释,小白秒懂,值得收藏。
1.2 3种方法:摄像头视频目标检测、小视频cv2目标检测,和视频目标检测生成avi视频。
1.3 yolo的基本介绍。
1.4 单张静态图片目标检测,我已经讲过,可参考文章
《目标检测和识别:Python+OpenCV+Yolov3》,文章中三个文件:yolov3.cfg、coco.names和yolov3.weights.下载已经告知,本文省略。建议先看这篇文章后,再看本文。
2 效果图:
=======
2.1 摄像头实时目标检测
2.2 读取视频实时目标检测:
2.3 目标检测后生成avi视频:
3 摄像头实时目标检测代码:
#代码来源,进行注释和分析#https://blog.csdn.net/qq_39567427/article/details/105451962?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-1.add_param_isCf&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-1.add_param_isCf#摄像头实时目标检测#第1步:导入模块import cv2import numpy as np#第2步:加载yolov3的模型和三个文件#注意路径net = cv2.dnn.readNet("/home/xgj/Desktop/yolov3/yolov3.weights", "/home/xgj/Desktop/yolov3/yolov3.cfg")#名字分类的空的列表classes = []with open("/home/xgj/Desktop/yolov3/coco.names", "r") as f: classes = [line.strip() for line in f.readlines()]layer_names = net.getLayerNames()output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]#第3步:初始化相关参数#颜色随机产生colors = np.random.uniform(0, 255, size=(len(classes), 3))# Initialize frame rate calculationframe_rate_calc = 1#获取频率freq = cv2.getTickFrequency()# 0提示读取摄像头# 如果格式是'xxx/xxx/xxx.mp4'就是读取本地视频文件cap = cv2.VideoCapture(0)#第4步:循环while True: # Start timer (for calculating frame rate) t1 = cv2.getTickCount() #获取读取摄像头资料 ret, frame = cap.read() #以下与单张静态图片类似 #获取高、宽和通道 height, width, channels = frame.shape # Detecting objects,侦测对象相关设置,与静态图片类似 blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False) net.setInput(blob) outs = net.forward(output_layers) # Showing informations on the screen #屏幕展示信息设置 class_ids = [] confidences = [] boxes = [] #outs是上面获取的 for out in outs: for detection in out: scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] if confidence > 0.5: # Object detected center_x = int(detection[0] * width) center_y = int(detection[1] * height) w = int(detection[2] * width) h = int(detection[3] * height) # Rectangle coordinates x = int(center_x - w / 2) y = int(center_y - h / 2) boxes.append([x, y, w, h]) confidences.append(float(confidence)) class_ids.append(class_id) indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) font = cv2.FONT_HERSHEY_SIMPLEX for i in range(len(boxes)): if i in indexes: x, y, w, h = boxes[i] label = str(classes[class_ids[i]]) color = colors[i] cv2.rectangle(frame, (x, y), (x + w, y + h), color, 1) cv2.putText(frame, label, (x, y - 20), font, 0.7, color, 1) cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc), (30,50),font, 0.7, (255,255,0), 1) cv2.namedWindow("Image", cv2.WINDOW_NORMAL) cv2.resizeWindow("Image", 960, 540) cv2.imshow("Image", frame) #以上与单张静态图片类似 # Calculate framerate t2 = cv2.getTickCount() time1 = (t2-t1)/freq frame_rate_calc= 1/time1 # Press 'q' to quit,默认为1 if cv2.waitKey(1) == ord('q'): break#第5步:结束关闭cap.release()cv2.destroyAllWindows()
4 读取本地视频进行目标检测:
代码基本与上面相同,仅需给一下这一行代码为
cap = cv2.VideoCapture('/home/xgj/Desktop/yolov3/333.mp4') #上面注释里有介绍
5 高级一点的视频标准目标检测,并合成新的视频:
======================================
5.1 本机环境:python3.8+OpenCV4.2.0+华为笔记本电脑+deepin-linux操作系统。
5.2 代码说明:
#参考文章,对代码进行注释和适当修改#https://my.oschina.net/u/1464083/blog/2906860#本代码:sp-3.py
5.3 代码:
#第1步:导入模块import numpy as npimport argparseimport imutilsimport timeimport cv2import os#第2步:参数构建#实例化ap = argparse.ArgumentParser()#当前本代码同目录下的input文件夹:原视频位置ap.add_argument("-i", "--input", required=True, help="path to input video")#当前本代码同目录下的output文件夹:目标检测标定后的合成视频位置ap.add_argument("-o", "--output", required=True, help="path to output video")#当前本代码同目录下的yolov3文件夹ap.add_argument("-y", "--yolov3", required=True, help="base path to YOLO directory")#参数定义,默认值ap.add_argument("-c", "--confidence", type=float, default=0.5, help="minimum probability to filter weak detections")ap.add_argument("-t", "--threshold", type=float, default=0.3, help="threshold when applyong non-maxima suppression")args = vars(ap.parse_args())#第3步:加载yolov3的3个文件#注意:三个文件在yolov3文件下weightsPath = os.path.sep.join([args["yolov3"], "yolov3.weights"])configPath = os.path.sep.join([args["yolov3"], "yolov3.cfg"])labelsPath = os.path.sep.join([args["yolov3"], "coco.names"])#读取标签名文件LABELS = open(labelsPath).read().strip().split("") # initialize a list of colors to represent each possible class labelnp.random.seed(42)#颜色随机设置COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),dtype="uint8")print("[INFO] loading YOLO from disk...")net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)ln = net.getLayerNames()ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]#第4步:获取输入的原视频文件vs = cv2.VideoCapture(args["input"])writer = None(W, H) = (None, None)# try to determine the total number of frames in the video filetry: prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() else cv2.CAP_PROP_FRAME_COUNT total = int(vs.get(prop)) #测算本视频内的目标检测识别后的总框架 print("[INFO] {} total frames in video".format(total)) # an error occurred while trying to determine the total# number of frames in the video fileexcept: print("[INFO] could not determine # of frames in video") print("[INFO] no approx. completion time can be provided") total = -1#第5步:循环# loop over frames from the video file streamwhile True: # read the next frame from the file (grabbed, frame) = vs.read() # if the frame was not grabbed, then we have reached the end # of the stream if not grabbed: break # if the frame dimensions are empty, grab them if W is None or H is None: (H, W) = frame.shape[:2] # construct a blob from the input frame and then perform a forward # pass of the YOLO object detector, giving us our bounding boxes # and associated probabilities blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) start = time.time() layerOutputs = net.forward(ln) end = time.time() # initialize our lists of detected bounding boxes, confidences, # and class IDs, respectively boxes = [] confidences = [] classIDs = [] # loop over each of the layer outputs for output in layerOutputs: # loop over each of the detections for detection in output: scores = detection[5:] classID = np.argmax(scores) confidence = scores[classID] if confidence > args["confidence"]: box = detection[0:4] * np.array([W, H, W, H]) (centerX, centerY, width, height) = box.astype("int") x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) classIDs.append(classID) #绑定box idxs = cv2.dnn.NMSBoxes(boxes, confidences, args["confidence"], args["threshold"]) # ensure at least one detection exists #确保一个侦测存在 if len(idxs) > 0: # loop over the indexes we are keeping for i in idxs.flatten(): # extract the bounding box coordinates (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) # draw a bounding box rectangle and label on the frame color = [int(c) for c in COLORS[classIDs[i]]] cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i]) cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) # check if the video writer is None #写入新的视频文件 if writer is None: # initialize our video writer fourcc = cv2.VideoWriter_fourcc(*"MJPG") writer = cv2.VideoWriter(args["output"], fourcc, 30, (frame.shape[1], frame.shape[0]), True) # some information on processing single frame if total > 0: elap = (end - start) print("[INFO] single frame took {:.4f} seconds".format(elap)) print("[INFO] estimated total time to finish: {:.4f} seconds".format( elap * total)) # write the output frame to disk writer.write(frame)#第6步:结束和收尾# release the file pointersprint("[INFO] cleaning up...")writer.release()vs.release()
5.4 运行:
#第7步:使用方法#在本代码sp-3.py的目录下打开终端#本机输入#python3.8 sp-3.py --input videos/222.mp4 --output output/2222.avi --yolov3 yolov3
5.5 图:
6 小结:
=====
6.1 摄像头视频,有点卡,小bug。
6.2 合成视频,有点慢,xiaobug。
7 yolo介绍:
=========
7.1 yolo是目前比较流行的目标检测算法,速度快结构简单。
7.2 YOLO 是 2016 年提出来的目标检测算法,当前较为火热。
7.3 yolo=You Only Look Once:
Unified, Real-Time Object Detection,
即:You Only Look Once说的是只需要一次CNN运算,
Unified指的是这是一个统一的框架,提供end-to-end的预测,
而Real-Time体现是Yolo算法速度快,达到实时。
7.4 yolov3比较成熟,但目前已经出现yolov4和yolov5(被认为是v4.5版本)。