在这一部分,基于前面的各个部分内容,将利用激光雷达和相机实现目标的检测分类及单目标跟踪,并对车辆前方的车辆目标计算相应的碰撞时间TTC。计算方法分别利用相机和雷达实现。
1.系统的总体架构:
2.TTC计算
简单介绍一下通过激光雷达和相机计算碰撞时间的原理:
利用激光雷达点计算碰撞时间:首先根据检测框聚类出前车目标的点,并过滤掉这些点中的离群点,然后找出这些点中相对于本车最近的点,基于前后两帧以及数据采集频率就可以求解出相对速度,然后用当前帧的距离除以速度得出当前的TTC。但该方法对最近点的选取稳定性不是很好,后面有相应的采用均值的改进方法。
课程中采用的是基于恒定速度假设的计算方法。
简单的基于最近点的TTC计算代码
void computeTTCLidar(std::vector<LidarPoint> &lidarPointsPrev,
std::vector<LidarPoint> &lidarPointsCurr, double &TTC)
{
// auxiliary variables
double dT = 0.1; // time between two measurements in seconds
// find closest distance to Lidar points
double minXPrev = 1e9, minXCurr = 1e9;
for(auto it=lidarPointsPrev.begin(); it!=lidarPointsPrev.end(); ++it) {
minXPrev = minXPrev>it->x ? it->x : minXPrev;
}
for(auto it=lidarPointsCurr.begin(); it!=lidarPointsCurr.end(); ++it) {
minXCurr = minXCurr>it->x ? it->x : minXCurr;
}
// compute TTC from both measurements
TTC = minXCurr * dT / (minXPrev-minXCurr);
}
而相机计算TTC由于采用的是单目相机数据,所以采用了简单的基于纹理关键点投射原理的计算像素距离的方式,这种方式计算出的结果精度不尽人意,比较差。可以参考看看。
基于相机像素距离计算TTC
vector<double> distRatios; // stores the distance ratios for all keypoints between curr. and prev. frame
for (auto it1 = kptMatches.begin(); it1 != kptMatches.end() - 1; ++it1)
{ // outer kpt. loop
// get current keypoint and its matched partner in the prev. frame
cv::KeyPoint kpOuterCurr = kptsCurr.at(it1->trainIdx);
cv::KeyPoint kpOuterPrev = kptsPrev.at(it1->queryIdx);
for (auto it2 = kptMatches.begin() + 1; it2 != kptMatches.end(); ++it2)
{ // inner kpt.-loop
double minDist = 100.0; // min. required distance
// get next keypoint and its matched partner in the prev. frame
cv::KeyPoint kpInnerCurr = kptsCurr.at(it2->trainIdx);
cv::KeyPoint kpInnerPrev = kptsPrev.at(it2->queryIdx);
// compute distances and distance ratios
double distCurr = cv::norm(kpOuterCurr.pt - kpInnerCurr.pt);
double distPrev = cv::norm(kpOuterPrev.pt - kpInnerPrev.pt);
if (distPrev > std::numeric_limits<double>::epsilon() && distCurr >= minDist)
{ // avoid division by zero
double distRatio = distCurr / distPrev;
distRatios.push_back(distRatio);
}
} // eof inner loop over all matched kpts
} // eof outer loop over all matched kpts
// only continue if list of distance ratios is not empty
if (distRatios.size() == 0)
{
TTC = NAN;
return;
}
// STUDENT TASK (replacement for meanDistRatio)
std::sort(distRatios.begin(), distRatios.end());
long medIndex = floor(distRatios.size() / 2.0);
double medDistRatio = distRatios.size() % 2 == 0 ? (distRatios[medIndex - 1] + distRatios[medIndex]) / 2.0 : distRatios[medIndex]; // compute median dist. ratio to remove outlier influence
double dT = 1 / frameRate;
TTC = -dT / (1 - medDistRatio);
3.项目代码架构
其中主要文件包括:
1.FinalProject_Camera.cpp 为项目主文件,从该文件总括了整个项目的流程架构。
2.camFusion_Student.cpp为主要函数文件,包含了点云聚类函数计算TTC的函数,检测框匹配函数等重要功能函数。
3.另外utils.cpp是TTC计算的辅助函数包。
4.此外剩余的文件为数据结构定义、点云处理、图片特征点提取、描述、匹配等函数相关文件。
FinalProject_Camera.cpp 代码:
/* INCLUDES FOR THIS PROJECT */
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <vector>
#include <cmath>
#include <limits>
#include <opencv2/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/features2d.hpp>
//#include <opencv2/xfeatures2d.hpp>
//#include <opencv2/xfeatures2d/nonfree.hpp>
#include "dataStructures.h"
#include "matching2D.hpp"
#include "objectDetection2D.hpp"
#include "lidarData.hpp"
#include "camFusion.hpp"
using namespace std;
/* MAIN PROGRAM */
int main(int argc, const char *argv[])
{
/* INIT VARIABLES AND DATA STRUCTURES */
// data location 导入图片数据
string dataPath = "../";
// camera
string imgBasePath = dataPath + "images/";
string imgPrefix = "KITTI/2011_09_26/image_02/data/000000"; // left camera, color
string imgFileType = ".png";
int imgStartIndex = 0; // first file index to load (assumes Lidar and camera names have identical naming convention)
int imgEndIndex = 40; // last file index to load
int imgStepWidth = 1;
int imgFillWidth = 4; // no. of digits which make up the file index (e.g. img-0001.png)
// object detection YOLO检测
string yoloBasePath = dataPath + "dat/yolo/";
string yoloClassesFile = yoloBasePath + "coco.names";
string yoloModelConfiguration = yoloBasePath + "yolov3.cfg";
string yoloModelWeights = yoloBasePath + "yolov3.weights";
// Lidar
string lidarPrefix = "KITTI/2011_09_26/velodyne_points/data/000000";
string lidarFileType = ".bin";
// calibration data for camera and lidar
cv::Mat P_rect_00(3,4,cv::DataType<double>::type); // 3x4 projection matrix after rectification 相机内参
cv::Mat R_rect_00(4,4,cv::DataType<double>::type); // 3x3 rectifying rotation to make image planes co-planar 相机与相机外参
cv::Mat RT(4,4,cv::DataType<double>::type); // rotation matrix and translation vector 激光雷达与相机外参
RT.at<double>(0,0) = 7.533745e-03; RT.at<double>(0,1) = -9.999714e-01; RT.at<double>(0,2) = -6.166020e-04; RT.at<double>(0,3) = -4.069766e-03;
RT.at<double>(1,0) = 1.480249e-02; RT.at<double>(1,1) = 7.280733e-04; RT.at<double>(1,2) = -9.998902e-01; RT.at<double>(1,3) = -7.631618e-02;
RT.at<double>(2,0) = 9.998621e-01; RT.at<double>(2,1) = 7.523790e-03; RT.at<double>(2,2) = 1.480755e-02; RT.at<double>(2,3) = -2.717806e-01;
RT.at<double>(3,0) = 0.0; RT.at<double>(3,1) = 0.0; RT.at<double>(3,2) = 0.0; RT.at<double>(3,3) = 1.0;
R_rect_00.at<double>(0,0) = 9.999239e-01; R_rect_00.at<double>(0,1) = 9.837760e-03; R_rect_00.at<double>(0,2) = -7.445048e-03; R_rect_00.at<double>(0,3) = 0.0;
R_rect_00.at<double>(1,0) = -9.869795e-03; R_rect_00.at<double>(1,1) = 9.999421e-01; R_rect_00.at<double>(1,2) = -4.278459e-03; R_rect_00.at<double>(1,3) = 0.0;
R_rect_00.at<double>(2,0) = 7.402527e-03; R_rect_00.at<double>(2,1) = 4.351614e-03; R_rect_00.at<double>(2,2) = 9.999631e-01; R_rect_00.at<double>(2,3) = 0.0;
R_rect_00.at<double>(3,0) = 0; R_rect_00.at<double>(3,1) = 0; R_rect_00.at<double>(3,2) = 0; R_rect_00.at<double>(3,3) = 1;
P_rect_00.at<double>(0,0) = 7.215377e+02; P_rect_00.at<double>(0,1) = 0.000000e+00; P_rect_00.at<double>(0,2) = 6.095593e+02; P_rect_00.at<double>(0,3) = 0.000000e+00;
P_rect_00.at<double>(1,0) = 0.000000e+00; P_rect_00.at<double>(1,1) = 7.215377e+02; P_rect_00.at<double>(1,2) = 1.728540e+02; P_rect_00.at<double>(1,3) = 0.000000e+00;
P_rect_00.at<double>(2,0) = 0.000000e+00; P_rect_00.at<double>(2,1) = 0.000000e+00; P_rect_00.at<double>(2,2) = 1.000000e+00; P_rect_00.at<double>(2,3) = 0.000000e+00;
// misc
double sensorFrameRate = 10.0 / imgStepWidth; // frames per second for Lidar and camera
int dataBufferSize = 2; // no. of images which are held in memory (ring buffer) at the same time
vector<DataFrame> dataBuffer; // list of data frames which are held in memory at the same time
bool bVis = false; // visualize results
/* MAIN LOOP OVER ALL IMAGES */
for (size_t imgIndex = 0; imgIndex <= imgEndIndex - imgStartIndex; imgIndex+=imgStepWidth)
{
/* LOAD IMAGE INTO BUFFER */
// assemble filenames for current index
ostringstream imgNumber;
imgNumber << setfill('0') << setw(imgFillWidth) << imgStartIndex + imgIndex; //0000 总长度为IMfillwidth
string imgFullFilename = imgBasePath + imgPrefix + imgNumber.str() + imgFileType;//images/KITTI/2011_09_26/image_02/data/0000000000.png
// load image from file
cv::Mat img = cv::imread(imgFullFilename);
// push image into data frame buffer
DataFrame frame;
frame.cameraImg = img;
dataBuffer.push_back(frame); //将每一帧图片信息存储到容器中,每一个frame包含,databuffer是一个向量容器
/*
// cv::Mat cameraImg; // camera image
// std::vector<cv::KeyPoint> keypoints; // 2D keypoints within camera image
// cv::Mat descriptors; // keypoint descriptors
// std::vector<cv::DMatch> kptMatches; // keypoint matches between previous and current frame
// std::vector<LidarPoint> lidarPoints;
// std::vector<BoundingBox> boundingBoxes; // ROI around detected objects in 2D image coordinates
// std::map<int,int> bbMatches; // bounding box matches between previous and current frame
*/
cout << "#1 : LOAD IMAGE INTO BUFFER done" << endl;
/* DETECT & CLASSIFY OBJECTS */
//目标检测输出结果:boundingboxes
float confThreshold = 0.2; //置信度阈值
float nmsThreshold = 0.4; //极大值抑制阈值
//begin()返回的是容器的第一个元素的迭代器,但end()返回的是末尾元素再下一个元素的迭代器
detectObjects((dataBuffer.end() - 1)->cameraImg, (dataBuffer.end() - 1)->boundingBoxes, confThreshold, nmsThreshold,
yoloBasePath, yoloClassesFile, yoloModelConfiguration, yoloModelWeights, bVis);
cout << "#2 : DETECT & CLASSIFY OBJECTS done" << endl;
/* CROP LIDAR POINTS */
//得到感兴趣区域的点云
// load 3D Lidar points from file
string lidarFullFilename = imgBasePath + lidarPrefix + imgNumber.str() + lidarFileType;
std::vector<LidarPoint> lidarPoints;
loadLidarFromFile(lidarPoints, lidarFullFilename);
// remove Lidar points based on distance properties
float minZ = -1.5, maxZ = -0.9, minX = 2.0, maxX = 20.0, maxY = 2.0, minR = 0.1; // focus on ego lane
cropLidarPoints(lidarPoints, minX, maxX, maxY, minZ, maxZ, minR);
(dataBuffer.end() - 1)->lidarPoints = lidarPoints;
cout << "#3 : CROP LIDAR POINTS done" << endl;
/* CLUSTER LIDAR POINT CLOUD */
// associate Lidar points with camera-based ROI
float shrinkFactor = 0.10; // shrinks each bounding box by the given percentage to avoid 3D object merging at the edges of an ROI
clusterLidarWithROI((dataBuffer.end()-1)->boundingBoxes, (dataBuffer.end() - 1)->lidarPoints, shrinkFactor, P_rect_00, R_rect_00, RT);
// Visualize 3D objects
bVis = false;
if(bVis)
{
show3DObjects((dataBuffer.end()-1)->boundingBoxes, cv::Size(4.0, 20.0), cv::Size(2000, 2000), true);
}
bVis = false;
cout << "#4 : CLUSTER LIDAR POINT CLOUD done" << endl;
// REMOVE THIS LINE BEFORE PROCEEDING WITH THE FINAL PROJECT
// continue; // skips directly to the next image without processing what comes beneath
/* DETECT IMAGE KEYPOINTS */
// convert current image to grayscale
cv::Mat imgGray;
cv::cvtColor((dataBuffer.end()-1)->cameraImg, imgGray, cv::COLOR_BGR2GRAY);
// extract 2D keypoints from current image
vector<cv::KeyPoint> keypoints; // create empty feature list for current image
string detectorType = "SHITOMASI";
if (detectorType.compare("SHITOMASI") == 0)
{
detKeypointsShiTomasi(keypoints, imgGray, false);
}
else
{
//...
}
// optional : limit number of keypoints (helpful for debugging and learning)
bool bLimitKpts = true;
if (bLimitKpts)
{
int maxKeypoints = 50;
if (detectorType.compare("SHITOMASI") == 0)
{ // there is no response info, so keep the first 50 as they are sorted in descending quality order
keypoints.erase(keypoints.begin() + maxKeypoints, keypoints.end());
}
cv::KeyPointsFilter::retainBest(keypoints, maxKeypoints);
cout << " NOTE: Keypoints have been limited!" << endl;
}
// push keypoints and descriptor for current frame to end of data buffer
(dataBuffer.end() - 1)->keypoints = keypoints;
cout << "#5 : DETECT KEYPOINTS done" << endl;
/* EXTRACT KEYPOINT DESCRIPTORS */
cv::Mat descriptors;
string descriptorType = "BRISK"; // BRISK, BRIEF, ORB, FREAK, AKAZE, SIFT
descKeypoints((dataBuffer.end() - 1)->keypoints, (dataBuffer.end() - 1)->cameraImg, descriptors, descriptorType);
// push descriptors for current frame to end of data buffer
(dataBuffer.end() - 1)->descriptors = descriptors;
cout << "#6 : EXTRACT DESCRIPTORS done" << endl;
if (dataBuffer.size() > 1) // wait until at least two images have been processed
{
/* MATCH KEYPOINT DESCRIPTORS */
vector<cv::DMatch> matches;
string matcherType = "MAT_BF"; // MAT_BF, MAT_FLANN
string descriptorType = "DES_BINARY"; // DES_BINARY, DES_HOG
string selectorType = "SEL_NN"; // SEL_NN, SEL_KNN
matchDescriptors((dataBuffer.end() - 2)->keypoints, (dataBuffer.end() - 1)->keypoints,
(dataBuffer.end() - 2)->descriptors, (dataBuffer.end() - 1)->descriptors,
matches, descriptorType, matcherType, selectorType);
// store matches in current data frame
(dataBuffer.end() - 1)->kptMatches = matches; //将特征点匹配对存储到后一帧中
cout << "#7 : MATCH KEYPOINT DESCRIPTORS done" << endl;
/* TRACK 3D OBJECT BOUNDING BOXES */
STUDENT ASSIGNMENT
TASK FP.1 -> match list of 3D objects (vector<BoundingBox>) between current and previous frame (implement ->matchBoundingBoxes)
map<int, int> bbBestMatches;
matchBoundingBoxes(matches, bbBestMatches, *(dataBuffer.end()-2), *(dataBuffer.end()-1)); // associate bounding boxes between current and previous frame using keypoint matches
EOF STUDENT ASSIGNMENT
// store matches in current data frame
(dataBuffer.end()-1)->bbMatches = bbBestMatches;
cout << "#8 : TRACK 3D OBJECT BOUNDING BOXES done" << endl;
/* COMPUTE TTC ON OBJECT IN FRONT */
// loop over all BB match pairs
for (auto it1 = (dataBuffer.end() - 1)->bbMatches.begin(); it1 != (dataBuffer.end() - 1)->bbMatches.end(); ++it1)
{
// find bounding boxes associates with current match
BoundingBox *prevBB, *currBB;
for (auto it2 = (dataBuffer.end() - 1)->boundingBoxes.begin(); it2 != (dataBuffer.end() - 1)->boundingBoxes.end(); ++it2)
{
if (it1->second == it2->boxID) // check wether current match partner corresponds to this BB
{
currBB = &(*it2);
}
}
for (auto it2 = (dataBuffer.end() - 2)->boundingBoxes.begin(); it2 != (dataBuffer.end() - 2)->boundingBoxes.end(); ++it2)
{
if (it1->first == it2->boxID) // check wether current match partner corresponds to this BB
{
prevBB = &(*it2);
}
}
// compute TTC for current match
if( currBB->lidarPoints.size()>0 && prevBB->lidarPoints.size()>0 ) // only compute TTC if we have Lidar points
{
STUDENT ASSIGNMENT
TASK FP.2 -> compute time-to-collision based on Lidar data (implement -> computeTTCLidar)
double ttcLidar;
double ttcLidar1;
computeTTCLidar(prevBB->lidarPoints, currBB->lidarPoints, sensorFrameRate, ttcLidar, false);
computeTTCLidar(prevBB->lidarPoints, currBB->lidarPoints, sensorFrameRate, ttcLidar1, true);
EOF STUDENT ASSIGNMENT
STUDENT ASSIGNMENT
TASK FP.3 -> assign enclosed keypoint matches to bounding box (implement -> clusterKptMatchesWithROI)
TASK FP.4 -> compute time-to-collision based on camera (implement -> computeTTCCamera)
double ttcCamera;
clusterKptMatchesWithROI(*currBB, (dataBuffer.end() - 2)->keypoints, (dataBuffer.end() - 1)->keypoints, (dataBuffer.end() - 1)->kptMatches);
computeTTCCamera((dataBuffer.end() - 2)->keypoints, (dataBuffer.end() - 1)->keypoints, currBB->kptMatches, sensorFrameRate, ttcCamera);
EOF STUDENT ASSIGNMENT
bVis = true;
if (bVis)
{
cv::Mat visImg = (dataBuffer.end() - 1)->cameraImg.clone();
showLidarImgOverlay(visImg, currBB->lidarPoints, P_rect_00, R_rect_00, RT, &visImg);
cv::rectangle(visImg, cv::Point(currBB->roi.x, currBB->roi.y), cv::Point(currBB->roi.x + currBB->roi.width, currBB->roi.y + currBB->roi.height), cv::Scalar(0, 255, 0), 2);
char str[200];
sprintf(str, "TTC Lidar : %f s, TTC Camera : %f s, TTC1 Lidar : %f s", ttcLidar, ttcCamera, ttcLidar1);
// sprintf(str, "TTC Lidar : %f s, TTC1 Lidar : %f s", ttcLidar, ttcLidar1);
putText(visImg, str, cv::Point2f(80, 50), cv::FONT_HERSHEY_PLAIN, 2, cv::Scalar(0,0,255));
string windowName = "Final Results : TTC";
cv::namedWindow(windowName, 4);
cv::imshow(windowName, visImg);
cout << "Press key to continue to next frame" << endl;
cv::waitKey(0);
}
bVis = false;
} // eof TTC computation
} // eof loop over all BB matches
}
} // eof loop over all images
return 0;
}
注:该系列代码均为优达学城sensor_fusion课程的学习记录,欢迎交流学习。
代码下载地址:https://github.com/jhzhang19/SFND_3D_detect
数据包下载地址:https://download.csdn.net/download/qq_36814762/13120828
数据包下载解压后dat ,image和src文件放在同级目录