老生常谈之没有看过这四篇的宝子且零基础,请点击下面链接一一查看,有基础的或者已经懂双目立体视觉的标定与校正的可以直接观看这篇,我会介绍如何使用Triangulation进行深度预测(理论),用mediapipe进行人脸目标检测。想快进的宝子建议直接复制代码,然后跑一遍,不懂得再回来看。提前感谢大家点赞激励本大懒蛋创作:)
双目视觉标定-2.单目视觉标定opencv代码实战-CSDN博客
双目视觉标定-3.Stereo Calibration双目视觉标定理论-CSDN博客
双目视觉标定-4.双目视觉标定与校正之OpenCV代码入门篇-CSDN博客
目录
1. 什么是视差
首先引入一个最完美的模型,左右相机在一条水平线上,他们的参数也是相同的,假设这两个相机都已经相机标定且校正,那他们如图所示。我们会发现,明明是拍摄同一物体,但是在左右相机上成像的位置却不同。他们像素位置之间的差异叫做视差Disparity
所以我们可通过点P在像平面上的位置反推出真实的世界坐标和深度信息,公式如下:
2. 通过视差计算深度信息
注意单位的统一! 因为我们获取的点坐标都是pixel,所以我们需要进行f的pixel的单位换算,请看下面这张图,角度是相机的横向角度,一般每个相机都会提供这个作为参数,nptan里面的单位是radians,所以再进行一个nppi/180的换算
triangulation.py
import sys
import cv2
import numpy as np
import time
def find_depth(right_point, left_point, frame_right, frame_left, baseline,f, alpha):
# CONVERT FOCAL LENGTH f FROM [mm] TO [pixel]:
height_right, width_right, depth_right = frame_right.shape
height_left, width_left, depth_left = frame_left.shape
if width_right == width_left:
f_pixel = (width_right * 0.5) / np.tan(alpha * 0.5 * np.pi/180)
else:
print('Left and right camera frames do not have the same pixel width')
x_right = right_point[0]
x_left = left_point[0]
# CALCULATE THE DISPARITY:
disparity = x_left-x_right #Displacement between left and right frames [pixels]
# CALCULATE DEPTH z:
zDepth = (baseline*f_pixel)/disparity #Depth in unity of baseline
return zDepth
3. 目标检测及深度计算
接下来是使用mediapipe进行人脸检测并且估计深度的整体流程,首先进行双目相机的标定和校正,这个部分和上一篇文章一模一样,step1,2和3在这里只贴代码,不做任何讲解了
step1 获取chessboard图片
calibration_images.py
# Take the images of chessboard and save these images
import cv2
cap = cv2.VideoCapture(0)
cap2 = cv2.VideoCapture(2)
num = 0
while cap.isOpened():
succes1, img = cap.read()
succes2, img2 = cap2.read()
k = cv2.waitKey(5)
if k == 27:
break
elif k == ord('s'): # wait for 's' key to save and exit
cv2.imwrite('images/stereoLeft/imageL' + str(num) + '.png', img)
cv2.imwrite('images/stereoright/imageR' + str(num) + '.png', img2)
print("images saved!")
num += 1
cv2.imshow('Img 1',img)
cv2.imshow('Img 2',img2)
step2 双目相机内参矩阵获取
stereo_calibration.py
import numpy as np
import cv2 as cv
import glob
################ FIND CHESSBOARD CORNERS - OBJECT POINTS AND IMAGE POINTS #############################
chessboardSize = (9,6)
frameSize = (640,480)
# Set the standard for iteration (number + precision)
# termination criteria
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((chessboardSize[0] * chessboardSize[1], 3), np.float32)
# Give the coordinate in 2D to the 3D coordinate
objp[:,:2] = np.mgrid[0:chessboardSize[0],0:chessboardSize[1]].T.reshape(-1,2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpointsL = [] # 2d points in image plane.
imgpointsR = [] # 2d points in image plane.
imagesLeft = glob.glob('images/stereoLeft/*.png')
imagesRight = glob.glob('images/stereoRight/*.png')
for imgLeft, imgRight in zip(imagesLeft, imagesRight):
imgL = cv.imread(imgLeft)
imgR = cv.imread(imgRight)
grayL = cv.cvtColor(imgL, cv.COLOR_BGR2GRAY)
grayR = cv.cvtColor(imgR, cv.COLOR_BGR2GRAY)
# Find the chess board corners
retL, cornersL = cv.findChessboardCorners(grayL, chessboardSize, None)
retR, cornersR = cv.findChessboardCorners(grayR, chessboardSize, None)
# retL and retR = boolean if it successfully find the corners
# coordinates of corners saved in cornersL or cornersR
# If found, add object points, image points (after refining them)
if retL and retR == True:
objpoints.append(objp)
# Level up precision
cornersL = cv.cornerSubPix(grayL, cornersL, (11,11), (-1,-1), criteria)
imgpointsL.append(cornersL)
cornersR = cv.cornerSubPix(grayR, cornersR, (11,11), (-1,-1), criteria)
imgpointsR.append(cornersR)
# Draw and display the corners
cv.drawChessboardCorners(imgL, chessboardSize, cornersL, retL)
cv.imshow('img left', imgL)
cv.drawChessboardCorners(imgR, chessboardSize, cornersR, retR)
cv.imshow('img right', imgR)
cv.waitKey(1000)
cv.destroyAllWindows()
############## CALIBRATION #######################################################
# boolean; camera intrinsic matrix; distortion; rotation; translation
retL, cameraMatrixL, distL, rvecsL, tvecsL = cv.calibrateCamera(objpoints, imgpointsL, frameSize, None, None)
heightL, widthL, channelsL = imgL.shape
newCameraMatrixL, roi_L = cv.getOptimalNewCameraMatrix(cameraMatrixL, distL, (widthL, heightL), 1, (widthL, heightL))
retR, cameraMatrixR, distR, rvecsR, tvecsR = cv.calibrateCamera(objpoints, imgpointsR, frameSize, None, None)
heightR, widthR, channelsR = imgR.shape
newCameraMatrixR, roi_R = cv.getOptimalNewCameraMatrix(cameraMatrixR, distR, (widthR, heightR), 1, (widthR, heightR))
########## Stereo Vision Calibration #############################################
flags = 0
flags |= cv.CALIB_FIX_INTRINSIC
# Here we fix the intrinsic camara matrixes so that only Rot, Trns, Emat and Fmat are calculated.
# Hence intrinsic parameters are the same
criteria_stereo= (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# This step is performed to transformation between the two cameras and calculate Essential and Fundamenatl matrix
retStereo, newCameraMatrixL, distL, newCameraMatrixR, distR, rot, trans, essentialMatrix, fundamentalMatrix = cv.stereoCalibrate(objpoints, imgpointsL, imgpointsR, newCameraMatrixL, distL, newCameraMatrixR, distR, grayL.shape[::-1], criteria_stereo, flags)
#print(newCameraMatrixL)
#print(newCameraMatrixR)
########## Stereo Rectification #################################################
rectifyScale= 1
rectL, rectR, projMatrixL, projMatrixR, Q, roi_L, roi_R= cv.stereoRectify(newCameraMatrixL, distL, newCameraMatrixR, distR, grayL.shape[::-1], rot, trans, rectifyScale,(0,0))
stereoMapL = cv.initUndistortRectifyMap(newCameraMatrixL, distL, rectL, projMatrixL, grayL.shape[::-1], cv.CV_16SC2)
stereoMapR = cv.initUndistortRectifyMap(newCameraMatrixR, distR, rectR, projMatrixR, grayR.shape[::-1], cv.CV_16SC2)
print("Saving parameters!")
cv_file = cv.FileStorage('stereoMap.xml', cv.FILE_STORAGE_WRITE)
cv_file.write('stereoMapL_x',stereoMapL[0])
cv_file.write('stereoMapL_y',stereoMapL[1])
cv_file.write('stereoMapR_x',stereoMapR[0])
cv_file.write('stereoMapR_y',stereoMapR[1])
cv_file.release()
step3 双目相机校正方程
calibration.py
import sys
import numpy as np
import time
import imutils
import cv2
# Camera parameters to undistort and rectify images
cv_file = cv2.FileStorage()
cv_file.open('stereoMap.xml', cv2.FileStorage_READ)
stereoMapL_x = cv_file.getNode('stereoMapL_x').mat()
stereoMapL_y = cv_file.getNode('stereoMapL_y').mat()
stereoMapR_x = cv_file.getNode('stereoMapR_x').mat()
stereoMapR_y = cv_file.getNode('stereoMapR_y').mat()
def undistortRectify(frameR, frameL):
# Undistort and rectify images
undistortedL= cv2.remap(frameL, stereoMapL_x, stereoMapL_y, cv2.INTER_LANCZOS4, cv2.BORDER_CONSTANT, 0)
undistortedR= cv2.remap(frameR, stereoMapR_x, stereoMapR_y, cv2.INTER_LANCZOS4, cv2.BORDER_CONSTANT, 0)
return undistortedR, undistortedL
step4 视差计算深度方程
triangulation.py
import sys
import cv2
import numpy as np
import time
def find_depth(right_point, left_point, frame_right, frame_left, baseline,f, alpha):
# CONVERT FOCAL LENGTH f FROM [mm] TO [pixel]:
height_right, width_right, depth_right = frame_right.shape
height_left, width_left, depth_left = frame_left.shape
if width_right == width_left:
f_pixel = (width_right * 0.5) / np.tan(alpha * 0.5 * np.pi/180)
else:
print('Left and right camera frames do not have the same pixel width')
x_right = right_point[0]
x_left = left_point[0]
# CALCULATE THE DISPARITY:
disparity = x_left-x_right #Displacement between left and right frames [pixels]
# CALCULATE DEPTH z:
zDepth = (baseline*f_pixel)/disparity #Depth in [cm]
return zDepth
step5 目标检测 深度估计主流程
使用mediapipe进行检测,如果正确率大于07,那么被认为是人脸,它会在人脸上画一个框,我们选取左侧以及右侧相机的中间点,然后使用triangulation方法进行深度检测
stereo_vision.py
import sys
import cv2
import numpy as np
import time
import imutils
from matplotlib import pyplot as plt
# Mediapipe for face detection
import mediapipe as mp
import time
mp_facedetector = mp.solutions.face_detection
mp_draw = mp.solutions.drawing_utils
# Open both cameras
cap_right = cv2.VideoCapture(2, cv2.CAP_DSHOW)
cap_left = cv2.VideoCapture(0, cv2.CAP_DSHOW)
# Stereo vision setup parameters
frame_rate = 120 #Camera frame rate (maximum at 120 fps)
B = 9 #Distance between the cameras [cm]
f = 8 #Camera lense's focal length [mm]
alpha = 56.6 #Camera field of view in the horisontal plane [degrees]
# Main program loop with face detector and depth estimation using stereo vision
with mp_facedetector.FaceDetection(min_detection_confidence=0.7) as face_detection:
while(cap_right.isOpened() and cap_left.isOpened()):
succes_right, frame_right = cap_right.read()
succes_left, frame_left = cap_left.read()
################## CALIBRATION #########################################################
frame_right, frame_left = calibration.undistorRectify(frame_right, frame_left)
########################################################################################
# If cannot catch any frame, break
if not succes_right or not succes_left:
break
else:
start = time.time()
# Convert the BGR image to RGB
frame_right = cv2.cvtColor(frame_right, cv2.COLOR_BGR2RGB)
frame_left = cv2.cvtColor(frame_left, cv2.COLOR_BGR2RGB)
# Process the image and find faces
results_right = face_detection.process(frame_right)
results_left = face_detection.process(frame_left)
# Convert the RGB image to BGR
frame_right = cv2.cvtColor(frame_right, cv2.COLOR_RGB2BGR)
frame_left = cv2.cvtColor(frame_left, cv2.COLOR_RGB2BGR)
################## CALCULATING DEPTH #########################################################
center_right = 0
center_left = 0
if results_right.detections:
for id, detection in enumerate(results_right.detections):
mp_draw.draw_detection(frame_right, detection)
bBox = detection.location_data.relative_bounding_box
h, w, c = frame_right.shape
boundBox = int(bBox.xmin * w), int(bBox.ymin * h), int(bBox.width * w), int(bBox.height * h)
center_point_right = (boundBox[0] + boundBox[2] / 2, boundBox[1] + boundBox[3] / 2)
cv2.putText(frame_right, f'{int(detection.score[0]*100)}%', (boundBox[0], boundBox[1] - 20), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), 2)
if results_left.detections:
for id, detection in enumerate(results_left.detections):
mp_draw.draw_detection(frame_left, detection)
bBox = detection.location_data.relative_bounding_box
h, w, c = frame_left.shape
boundBox = int(bBox.xmin * w), int(bBox.ymin * h), int(bBox.width * w), int(bBox.height * h)
center_point_left = (boundBox[0] + boundBox[2] / 2, boundBox[1] + boundBox[3] / 2)
cv2.putText(frame_left, f'{int(detection.score[0]*100)}%', (boundBox[0], boundBox[1] - 20), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), 2)
# If no ball can be caught in one camera show text "TRACKING LOST"
if not results_right.detections or not results_left.detections:
cv2.putText(frame_right, "TRACKING LOST", (75,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255),2)
cv2.putText(frame_left, "TRACKING LOST", (75,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255),2)
else:
# Function to calculate depth of object. Outputs vector of all depths in case of several balls.
# All formulas used to find depth is in video presentaion
depth = tri.find_depth(center_point_right, center_point_left, frame_right, frame_left, B, f, alpha)
cv2.putText(frame_right, "Distance: " + str(round(depth,1)), (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,255,0),3)
cv2.putText(frame_left, "Distance: " + str(round(depth,1)), (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,255,0),3)
# Multiply computer value with 205.8 to get real-life depth in [cm]. The factor was found manually.
print("Depth: ", str(round(depth,1)))
end = time.time()
totalTime = end - start
fps = 1 / totalTime
#print("FPS: ", fps)
cv2.putText(frame_right, f'FPS: {int(fps)}', (20,450), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,255,0), 2)
cv2.putText(frame_left, f'FPS: {int(fps)}', (20,450), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,255,0), 2)
# Show the frames
cv2.imshow("frame right", frame_right)
cv2.imshow("frame left", frame_left)
# Hit "q" to close the window
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release and destroy all windows before termination
cap_right.release()
cap_left.release()
cv2.destroyAllWindows()
大家可以把代码跑一遍,最终可以得到这样的结果
4. 总结
这个系列可以完结撒花了,我最近应该会持续修改到最佳版本,之后我继续搞别的,大懒蛋继续做内容的搬运工