文章目录
首先拿到代码一定是先着重看一下结构,整体源代码分为三大块
- glow
- rangenet_lib
- semantic_suma。
其中rangenet_lib是语义相关的代码,而semantic_suma是核心代码,应该着重分析
流程梳理
一般来说,我们运行一个cpp项目文件,通常是要运行一个launch文件的,其中表明具体的运行节点以及对应的cpp。而这个SuMa++运行的时候是直接输入./visualizer,然后在可视化界面中打开对应的点云文件,并且播放的。所以我们刚开始的切入点就直接放在visualizer这个文件夹下面。
1. 打开点云文件
VisualizerWindow 43
// 该按钮对应的一个触发器,点一下就运行showOpenFileDialog()这个函数
connect(ui_.actionOpenLaserscan, SIGNAL(triggered()), this, SLOT(showOpenFileDialog()));
VisualizerWindow 202-211
void VisualizerWindow::showOpenFileDialog() {
play(false); // stop playback.
/*
*第一个参数 this,指代父窗口为当前组件,并在当前父窗口下弹出一个子框口对话框
*第二个参数Select laserscan file,用于指定弹出的对话框标题为"Select laserscan file"
*第三个参数lastDirectory_,指定对话框显示时默认打开的目录是lastDirectory_这个参数下存储的目录
*第四个参数过滤器,指筛选出来的文件格式
*/
QString retValue =
QFileDialog::getOpenFileName(this, "Select laserscan file", lastDirectory_,
"Laserscan files(*.bin *.pcap *.sim);;KITTI laserscans (*.bin);; PCAP "
"laserscans (*.pcap);; Simulation (*.sim)");
// 如果这个路径不为空,则打开该文件
if (!retValue.isNull()) openFile(retValue);
}
VisualizerWindow openFile()
void VisualizerWindow::openFile(const QString& filename) {
play(false); // stop playback.
// 设置窗口标题
if (!filename.isNull()) {
QString title = "Semantic SuMa";
title.append(" - ");
QStringList parts = filename.split("/");
title.append(QString::fromStdString(FileUtil::dirName(filename.toStdString())));
setWindowTitle(title);
std::shared_ptr<Model> model;
ui_.wCanvas->setRobotModel(model);
// 默认后缀为INVALID
std::string extension = "INVALID";
QFileInfo file_info(filename);
// 如果是目录,则以最多的文件后缀视作extension
if (file_info.isDir()) {
QDir directory(filename);
QStringList files = directory.entryList(QDir::NoFilter, QDir::Name);
std::map<std::string, int32_t> suffix_count;
for (int32_t i = 0; i < files.size(); ++i) {
QFileInfo info(directory.filePath(files[i]));
if (!info.isFile()) continue;
// 将文件后缀相同的数量存储在map suffix_count中,.suffix获取最后的文件后缀,toStdString转化为标准String
std::string ext = info.suffix().toStdString();
if (suffix_count.find(ext) == suffix_count.end()) suffix_count[ext] = 0;
suffix_count[ext] += 1;
}
if (suffix_count.size() > 0) {
int32_t maxCount = suffix_count.begin()->second;
extension = std::string(".") + suffix_count.begin()->first;
for (auto& entry : suffix_count) {
if (entry.second > maxCount) {
extension = std::string(".") + entry.first;
maxCount = entry.second;
}
}
std::cout << "Heuristically found '" << extension << "' as extension." << std::endl;
}
// 如果该文件只是一个,则将它的后缀作为extension
} else if (file_info.isFile()) {
extension = std::string(".") + file_info.suffix().toStdString();
}
if (extension == ".bin") {
delete reader_;
// 这个KITTIReader算是其中最重要的部分,读取了Kitti数据并且加入了语义信息
// 其中initScanFilenames(scan_filename);部分过滤了非".bin"格式的文件
// network = std::unique_ptr<Net>(new NetTensorRT(path));这个用TensorRT来读取已有的model
KITTIReader* reader_tmp = new KITTIReader(filename.toStdString(), params_, 50);
ui_.wCanvas->setColorMap(reader_tmp->getColorMap()); // get color map from rangenet
fusion_->setColorMap(reader_tmp->getColorMap());
reader_ = reader_tmp;
scanFrequency = 10.0f; // in Hz.
timer_.setInterval(10); // 1./10. second = 100 msecs.
if (fusion_ != nullptr) fusion_->reset();
ui_.wCanvas->reset();
uint32_t N = reader_->count();
precomputed_.resize(N);
oldPoses_.resize(N);
for (uint32_t i = 0; i < N; ++i) {
precomputed_[i] = false;
}
if (oldPoses_.size() > 0) oldPoses_[0] = Eigen::Matrix4f::Identity();
calib_.clear();
groundtruth_.clear();
// 校准文件读取
if (parts.size() > 2) {
std::string calibration_file;
for (int32_t i = 0; i < parts.size() - 2; ++i) {
calibration_file += parts[i].toStdString();
calibration_file += "/";
}
calibration_file += "calib.txt";
std::cout << "calibration filename: " << calibration_file << "..." << std::flush;
if (rv::FileUtil::exists(calibration_file)) {
calib_.initialize(calibration_file);
std::cout << "loaded." << std::endl;
ui_.wCanvas->setCalibration(calib_);
fusion_->setCalibration(calib_);
} else {
std::cout << "not found." << std::endl;
}
}
// 如果有矫正矩阵数据,则地面数据读取时存入为矫正后的数据
// 如果没有矫正数据,则直接存入地面真值数据
if (parts.size() > 3 && calib_.exists("Tr")) {
std::string sequence = parts[parts.size() - 3].toStdString();
Eigen::Matrix4f Tr = calib_["Tr"];
Eigen::Matrix4f Tr_inv = Tr.inverse();
// find ground truth poses.
std::string poses_file;
for (int32_t i = 0; i < parts.size() - 4; ++i) {
poses_file += parts[i].toStdString();
poses_file += "/";
}
poses_file += "poses/" + sequence + ".txt";
std::cout << "ground truth filename: " << poses_file << std::endl;
if (rv::FileUtil::exists(poses_file)) {
std::vector<Eigen::Matrix4f> poses = KITTI::Odometry::loadPoses(poses_file);
for (uint32_t i = 0; i < poses.size(); ++i) {
Eigen::Matrix4f pose = poses[i];
groundtruth_.push_back(Tr_inv * pose * Tr);
}
std::cout << groundtruth_.size() << " poses read." << std::endl;
} else {
std::cout << "not found." << std::endl;
}
} else {
std::string sequence = parts[parts.size() - 3].toStdString();
// find ground truth poses.
std::string poses_file;
for (int32_t i = 0; i < parts.size() - 4; ++i) {
poses_file += parts[i].toStdString();
poses_file += "/";
}
poses_file += "poses/" + sequence + ".txt";
std::cout << "ground truth filename: " << poses_file << std::endl;
if (rv::FileUtil::exists(poses_file)) {
std::vector<Eigen::Matrix4f> poses = KITTI::Odometry::loadPoses(poses_file);
for (uint32_t i = 0; i < poses.size(); ++i) {
Eigen::Matrix4f pose = poses[i];
groundtruth_.push_back(pose);
}
std::cout << groundtruth_.size() << " poses read." << std::endl;
} else {
std::cout << "not found." << std::endl;
}
}
// load the car model.
// 读取小汽车模型并且设定位姿
initilizeKITTIrobot();
ui_.wCanvas->setGroundtruth(groundtruth_);
ui_.sldTimeline->setEnabled(reader_->isSeekable());
ui_.sldTimeline->setMaximum(reader_->count() - 1);
if (ui_.sldTimeline->value() == 0) setScan(0); // triggers update of scan.
ui_.sldTimeline->setValue(0);
}
else {
std::cerr << "SuMa++ will fail because of wrong format of input LiDAR scans" << std::endl;
}
}
if (reader_ != nullptr) {
ui_.wCanvas->setScanCount(reader_->count());
}
}
2. 播放点云数据
VisualizerWindow.cpp 46
connect(ui_.btnPlay, SIGNAL(toggled(bool)), this, SLOT(play(bool)));
void VisualizerWindow::play(bool start) {
if (start) {
timer_.start();
ui_.btnPlay->setChecked(true);
} else {
timer_.stop();
ui_.btnPlay->setChecked(false);
}
updatePlaybackControls();
}
void VisualizerWindow::updatePlaybackControls() {
if (reader_ != nullptr) {
// 播放按钮的功能
ui_.btnPlay->setEnabled(true);
// 重新回到开始
ui_.btnReset->setEnabled(true);
// maybe better to define separate up/down icons.
if (ui_.btnPlay->isChecked())
ui_.btnPlay->setIcon(QIcon::fromTheme("media-playback-pause"));
else
ui_.btnPlay->setIcon(QIcon::fromTheme("media-playback-start"));
if (currentScanIdx_ < reader_->count() - 1)
// 如果当前帧不为最后一帧
ui_.btnNext->setEnabled(true);
else
ui_.btnNext->setEnabled(false);
} else {
ui_.btnPlay->setEnabled(false);
ui_.btnNext->setEnabled(false);
ui_.btnReset->setEnabled(false);
}
}
所以从这部分播放的代码可以看出,在可视化界面中的播放只是播放出结果,但是主要的处理过程是在打开点云的时候就执行完了。
3. SUMA部分的流程图说明
3.1 SUMA核心流程分析,其中也包含部分SUMA++
VisualizerWindow.cpp 515
if (ui_.sldTimeline->value() == 0) setScan(0); // triggers update of scan.
ui_.sldTimeline->setValue(0);
void VisualizerWindow::setScan(int idx) {
if (reader_ == nullptr) return;
if (static_cast<uint32_t>(idx) >= reader_->count()) return; // ignore invalid calls.
if (!reader_->isSeekable() && ((uint32_t)idx < currentScanIdx_)) return;
uint32_t lastScanIdx = currentScanIdx_;
currentScanIdx_ = idx;
// 这一部分的代码在我的Vscode里面ctrl+左键跳转出现问题,跳转到SimulationReader.cpp,其实应当在KITTIReader.cpp,因为是继承的KITTIReader
// 将要找的帧在缓存中查找,如果确定在缓存中,则继续。否则,只要缓存的容量充足,将当前帧和要查找的帧之间的帧进行缓存。
if (reader_->isSeekable()) reader_->seek(idx);
/*在KITTIReader::read(uint32_t scan_idx, Laserscan& scan)函数中有一个doProjection(scan, num_points)函数,
这部分代码对应于点云图到顶点图之间的转换
// get angles
float yaw = -std::atan2(y, x);
float pitch = std::asin(z / range);
// get projections in image coords
float proj_x = 0.5 * (yaw / M_PI + 1.0); // in [0.0, 1.0]
float proj_y = 1.0 - (pitch + std::abs(fov_down)) / fov; // in [0.0, 1.0]
// scale to image size using angular resolution
proj_x *= _img_w; // in [0.0, W]
proj_y *= _img_h; // in [0.0, H]
*/
/*生成二维距离图
for (uint32_t i = 0; i < inputs.size(); ++i) {
range_image[int(sorted_proj_ys[i] * _img_w + sorted_proj_xs[i])] = inputs[i];
}
...
...
// 将最终处理过的的range图(这里的range图已经是存储的推理后的数据)转换到semantic_scan(反投影)中保存,用作后续的语义标签
semantic_scan.push_back(range_image[proj_ys[i] * _img_w + proj_xs[i]]);
...
// 这一部分是从生成的语义数组中确认,每个点应当是哪个对应的语义类别,即从20个类中选最大的那个值作为该点的语义类,并且保存对应的语义标签
for (uint32_t i = 0; i < num_points; ++i) {
labels[i] = 0;
labels_prob[i] = 0;
for (uint32_t j = 0; j < 20; ++j)
{
if (labels_prob[i] <= color_mask[i*20+j])
{
labels[i] = label_map_[j];
labels_prob[i] = color_mask[i*20+j];
}
}
}
*/
reader_->read(currentLaserscan_);
if (lastScanIdx > currentScanIdx_) {
acc_->clear();
}
if (parameterUpdateNeeded_) {
fusion_->setParameters(params_);
// 如果参数需要更新,则重新设置一遍参数,并且对loop相关的几个参数更新阈值
// update timeseries thresholds.
// GraphWidget::Timeseries::Ptr ts = ui_.wGraph->getTimeseries("loop_relative_error_inlier");
// ts->removeAllThresholds();
// ts->addThreshold(params_["loop-residual-thresh"]);
GraphWidget::Timeseries::Ptr ts = ui_.wGraph->getTimeseries("loop_relative_error_all");
ts->removeAllThresholds();
ts->addThreshold(params_["loop-residual-threshold"]);
ts = ui_.wGraph->getTimeseries("loop_outlier_ratio");
ts->removeAllThresholds();
ts->addThreshold(params_["loop-outlier-threshold"]);
ts = ui_.wGraph->getTimeseries("loop_valid_ratio");
ts->removeAllThresholds();
ts->addThreshold(params_["loop-valid-threshold"]);
parameterUpdateNeeded_ = false;
}
// for (rv::ParameterList::const_iterator pit = params_.begin(); pit != params_.end(); ++pit) {
// std::cout << "pit: " << *pit << std::endl;
// }
if (fusion_ != nullptr) {
if (precomputed_[currentScanIdx_]) {
// 如果之前该帧处理过了
fusion_->setCurrentPose(oldPoses_[currentScanIdx_]);
fusion_->initialize(currentLaserscan_);
fusion_->preprocess();
fusion_->getCurrentFrame()->pose = oldPoses_[currentScanIdx_];
} else {
// 这帧是第一次处理
// 记录时间戳
int32_t timestamp = fusion_->timestamp();
// 本文后面有解析
fusion_->processScan(currentLaserscan_);
oldPoses_[currentScanIdx_] = fusion_->getCurrentPose().cast<float>();
precomputed_[currentScanIdx_] = true;
if (currentScanIdx_ % history_stride_ == 0) {
std::vector<rv::Point3f> pts = currentLaserscan_.points();
if (currentLaserscan_.hasRemission()) {
for (uint32_t i = 0; i < pts.size(); ++i) {
pts[i].vec[3] = currentLaserscan_.remission(i);
}
}
acc_->insert(timestamp, fusion_->getCurrentPose().cast<float>(), pts);
}
SurfelMapping::Stats stats = fusion_->getStatistics();
statistics.push_back(stats);
ui_.wGraph->getTimeseries("runtime")->insert(timestamp, stats["complete-time"]);
ui_.wGraph->getTimeseries("loop_outlier_ratio")->insert(timestamp, stats["loop_outlier_ratio"]);
ui_.wGraph->getTimeseries("loop_relative_error_all")->insert(timestamp, stats["loop_relative_error_all"]);
// ui_.wGraph->getTimeseries("num_iterations")->insert(timestamp, stats["num_iterations"]);
ui_.wGraph->getTimeseries("loop_valid_ratio")->insert(timestamp, stats["valid_ratio"]);
ui_.wGraph->getTimeseries("loop_outlier_ratio")->insert(timestamp, stats["outlier_ratio"]);
ui_.wGraph->getTimeseries("increment_difference")->insert(timestamp, stats["increment_difference"]);
// ui_.wGraph->getTimeseries("icp_percentage")->insert(timestamp, stats["icp_percentage"]);
if (fusion_->useLoopClosureCandidate()) ui_.wGraph->insertMarker(timestamp, Qt::green);
ui_.wCanvas->setResidualMap(fusion_->getCurrentFrame()->residual_map);
}
ui_.wCanvas->setCurrentFrame(currentScanIdx_, *fusion_->getCurrentFrame());
ui_.wCanvas->setOldSurfelFrame(*fusion_->getOldSurfelMap());
ui_.wCanvas->setNewSurfelFrame(*fusion_->getNewSurfelMap());
ui_.wCanvas->setModelFrame(*fusion_->getCurrentModelFrame());
std::vector<Eigen::Matrix4d> optimizedPoses = fusion_->getOptimizedPoses();
ui_.wCanvas->setOptimizedPoses(optimizedPoses);
ui_.wCanvas->setLoopClosurePose(fusion_->foundLoopClosureCandidate(), fusion_->useLoopClosureCandidate(),
fusion_->getLoopClosurePoses(), fusion_->getNewMapPose(), fusion_->getOldMapPose());
// update poses in accumulator.
std::vector<int32_t>& timestamps = acc_->getTimestamps();
std::vector<Eigen::Matrix4f>& acc_poses = acc_->getPoses();
for (uint32_t i = 0; i < timestamps.size(); ++i) {
if (timestamps[i] > -1) {
acc_poses[i] = optimizedPoses[timestamps[i]].cast<float>();
}
}
ui_.wGraph->setTimestamp(currentScanIdx_);
}
// triggers redraw:
ui_.wCanvas->setLaserscan(currentLaserscan_);
updatePlaybackControls();
ui_.txtScanNumber->setText(QString::number(currentScanIdx_));
ui_.wCanvas->setOdomPoses(fusion_->getIntermediateOdometryPoses());
if (recording_) {
std::string out_filename = outputDirectory_;
out_filename += QString("/%1.png").arg((int)currentScanIdx_, 5, 10, (QChar)'0').toStdString();
ui_.wCanvas->grabFrameBuffer().save(QString::fromStdString(out_filename));
std::cout << "Writing to " << out_filename << std::endl;
}
#ifdef QUERY_MEMORY_NV
GLint totalMemory = 0, availableMemory = 0;
glGetIntegerv(GL_GPU_MEM_INFO_TOTAL_AVAILABLE_MEM_NVX, &totalMemory);
glGetIntegerv(GL_GPU_MEM_INFO_CURRENT_AVAILABLE_MEM_NVX, &availableMemory);
CheckGlError();
ui_.pbGpuMemory->setValue(100 * float(availableMemory) / float(totalMemory));
#endif
}
void SurfelMapping::processScan(const rv::Laserscan& scan) {
// 时间tictak
Stopwatch::tic();
// check if optimization ready, copy poses, reinitialize loop closure count.
// 将之前进行回环处理的位姿变换进行更新,回环数量进行调整,减去之前的回环
/* void SurfelMapping::integrateLoopClosures(){
for (uint32_t i = 0; i < poses_opt.size(); ++i) {
// 优化后的位姿图位姿直接插入新的位姿数组
casted_poses.push_back(poses_opt[i].cast<float>());
posegraph_->setInitial(i, poses_opt[i]);
}
loopCount_ -= beforeLoopCount_;
// 新的回环产生的位姿变换
Eigen::Matrix4d difference = poses_opt[beforeID_] * beforeOptimizationPose_.inverse();
for (uint32_t i = poses_opt.size(); i < poses_before.size(); ++i) {
// 更新原先存储的位姿
poses_before[i] = difference * poses_before[i];
casted_poses.push_back(poses_before[i].cast<float>());
posegraph_->setInitial(i, poses_before[i]);
}
...
}
*/
if (makeLoopClosures_ && performMapping_) integrateLoopClosures();
Stopwatch::tic();
// 初始化雷达数据,将雷达数据表示成xyz(为啥不用点云库?),复制给current_pts_
initialize(scan);
statistics_["initialize-time"] = Stopwatch::toc();
Stopwatch::tic();
// 用OpenGL根据处理过的点云创建vertex map和normal map
// SuMa++引入了一种洪泛算法来消除错误标签。洪泛算法以语义mask和顶点图为输入,输出修正之后的语义mask
preprocess();
statistics_["preprocessing-time"] = Stopwatch::toc();
// double icpTime = 0.0;
if (timestamp_ > 0) {
Stopwatch::tic();
// 更新当前帧的位姿,并且更新位姿图,同时保存位姿变化距离存入轨迹距离栈,采用最后一个姿态增量进行初始化。
// 帧间ICP的优化最小距离,odometry部分
updatePose();
statistics_["icp-time"] = Stopwatch::toc();
Stopwatch::tic();
/*
//Verifying and adding constraints.
// 渲染静态图
map_->render_inactive(pose_old, getConfidenceThreshold());
// adjust increment.
// 确认当前帧和上一帧的增量是否更大,如果距离更近,则选用当前帧作为候选回环帧
objective_->setData(currentFrame_, map_->oldMapFrame());
gn_->minimize(*objective_, lastIncrement_);
// 根据上述产生的候选回环帧和当前帧制作虚拟帧,也就是这里的composed地图
map_->render_composed(pose_old, currentPose_new_.cast<float>(), getConfidenceThreshold());
// 根据上述产生的虚拟帧进行回环检测,如果满足回环的标准,作为回环候选
// 当有三个以上回环候选时,对每个回环候选,用三个参数计算帧间残差,取出变化最小的那个
gn_->minimize(*objective_, initializations[i]);
// 如果最小的残差满足标准,则作为预备回环,加入unverifiedLoopClosures_
*/
if (makeLoopClosures_ && performMapping_) checkLoopClosure();
statistics_["loop-time"] = Stopwatch::toc();
}
Stopwatch::tic();
// 创建面元,并且更新先前的地图中关联的面元,对原先活动子图中的坐标进行更新
if (performMapping_) updateMap();
statistics_["mapping-time"] = Stopwatch::toc();
float completeTime = Stopwatch::toc();
statistics_["complete-time"] = completeTime;
statistics_["icp_percentage"] = statistics_["opt-time"] / completeTime;
timestamp_ += 1;
}
3.2 preprocess部分
void Preprocessing::process(glow::GlBuffer<rv::Point3f>& points, Frame& frame, glow::GlBuffer<float>& labels, glow::GlBuffer<float>& probs, uint32_t timestamp_) {
// OpenGL的指令,返回值均为void,因此没法通过返回值来判断指令错误 。为了能够第一时间发现问题,需要加入一个封装来监测是否有指令调用失败。
CheckGlError();
// 把给定的数据分配给缓冲区
// 点云数据分配给当前帧
frame.points.assign(points);
// 对应于标签序号,即相应的标签类别号
frame.labels.assign(labels); // for semantic map
// 对应于最大类别的概率
frame.probs.assign(probs);
GLboolean depthTest;
GLint ov[4], depthFunc, old_fbo;
GLfloat cc[4];
GLfloat psize;
/*glGetIntegerv,后面几个类似,都是把第一个参数的值存储到第二个参数指针对应的空间当中
void WINAPI glViewport(
GLint x, // 视区左下角的坐标(x,y)
GLint y,
GLsizei width, // 视区宽度
GLsizei height // 视区高度
);
*/
glGetIntegerv(GL_VIEWPORT, ov);
// GL_DEPTH_TEST:指示是否为写入启用了深度缓冲区
glGetBooleanv(GL_DEPTH_TEST, &depthTest);
// GL_DEPTH_FUNC:指示深度比较函数的符号常量
glGetIntegerv(GL_DEPTH_FUNC, &depthFunc);
// GL_COLOR_CLEAR_VALUE:用于清除颜色缓冲区的红色、绿色、蓝色和 alpha 值
glGetFloatv(GL_COLOR_CLEAR_VALUE, cc);
// GL_POINT_SIZE:返回glPointSize 指定的点大小
glGetFloatv(GL_POINT_SIZE, &psize);
// GL_DRAW_FRAMEBUFFER_BINDING:获取绘图帧缓冲的绑定信息
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_fbo);
glPointSize(1.0f);
// 将这些数据与内存中对应的位置进行逻辑连接,为了建立CPU和GPU之间的逻辑连接,从而实现了CPU数据上传至GPU
vao_points_.setVertexAttribute(0, points, 4, AttributeType::FLOAT, false, 4 * sizeof(float), nullptr);
vao_points_.enableVertexAttribute(0);
vao_points_.setVertexAttribute(1, labels, 1, AttributeType::FLOAT, false, 1 * sizeof(float), (void*)(4* sizeof(float)));
vao_points_.enableVertexAttribute(1);
vao_points_.setVertexAttribute(2, probs, 1, AttributeType::FLOAT, false, 1 * sizeof(float), (void*)(5* sizeof(float)));
vao_points_.enableVertexAttribute(2);
// 1. pass: generate raw vertex map:
if (avgVertexmap_)
// attach::glFramebufferTexture2D:帧缓冲区本身是不存放颜色、深度等信息的,这些信息需要通过纹理、深度缓存来存放,这些缓存可以绑定到帧缓冲区上。
semanticbuffer_.attach(FramebufferAttachment::COLOR0, temp_vertices_);
else
semanticbuffer_.attach(FramebufferAttachment::COLOR0, frame.vertex_map);
assert(semanticbuffer_.valid());
// 这个语义地图绑定缓冲区时是用另一种颜色存放的
semanticbuffer_.attach(FramebufferAttachment::COLOR1, frame.semantic_map);
semanticbuffer_.bind();
glDepthFunc(GL_LESS);
if (avgVertexmap_) {
// average vertex map.
glDisable(GL_DEPTH_TEST);
glEnable(GL_BLEND);
/*
假设源颜色的四个分量(指红色,绿色,蓝色,alpha值)是(Rs, Gs, Bs, As),
目标颜色的四个分量是(Rd, Gd, Bd, Ad),
又设源因子为(Sr, Sg, Sb, Sa),
目标因子为(Dr, Dg, Db, Da)。
则混合产生的新颜色可以表示为: (RsSr+RdDr, GsSg+GdDg, BsSb+BdDb, AsSa+AdDa)
*/
glBlendFunc(GL_ONE, GL_ONE); // computing the sum of values
} else {
// z-buffered vertex map
glEnable(GL_DEPTH_TEST);
}
// glClearColor 的作用是,指定刷新颜色缓冲区时所用的颜色。所以,完成一个刷新过程是要 glClearColor(COLOR) 与 glClear(GL_COLOR_BUFFER_BIT) 配合使用
glClearColor(0, 0, 0, 0);
// glViewport (GLint x, GLint y, GLsizei width, GLsizei height);
// x,y 以像素为单位,指定了视口的左下角位置。
// width,height 表示这个视口矩形的宽度和高度,根据窗口的实时变化重绘窗口。
glViewport(0, 0, width_, height_);
// glClearColor 的作用是,指定刷新颜色缓冲区时所用的颜色。所以,完成一个刷新过程是要 glClearColor(COLOR) 与 glClear(GL_COLOR_BUFFER_BIT) 配合使用
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // reset depth/vertexmap
if (timestamp_ < 10)
depth_program_.setUniform(GlUniform<bool>("isfirst", true));
else
depth_program_.setUniform(GlUniform<bool>("isfirst", false));
vao_points_.bind();
depth_program_.bind();
// 提供绘制功能,从数组数据中提取数据渲染基本图元。
glDrawArrays(GL_POINTS, 0, points.size());
depth_program_.release();
vao_points_.release();
semanticbuffer_.release();
glFinish();
if (avgVertexmap_) {
glDisable(GL_BLEND);
framebuffer_.attach(FramebufferAttachment::COLOR0, frame.vertex_map);
framebuffer_.bind();
vao_no_points_.bind();
avg_program_.bind();
glActiveTexture(GL_TEXTURE0);
temp_vertices_.bind();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glDrawArrays(GL_POINTS, 0, 1);
temp_vertices_.release();
vao_no_points_.release();
avg_program_.release();
framebuffer_.release();
glEnable(GL_DEPTH_TEST);
}
// step 1.5: do bilateral filtering of vertex map.
if (filterVertexmap_) {
framebuffer_.attach(FramebufferAttachment::COLOR0, temp_vertices_);
glActiveTexture(GL_TEXTURE0);
frame.vertex_map.bind();
framebuffer_.bind();
vao_no_points_.bind();
bilateral_program_.bind();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glDrawArrays(GL_POINTS, 0, 1);
vao_no_points_.release();
bilateral_program_.release();
framebuffer_.release();
glActiveTexture(GL_TEXTURE0);
frame.vertex_map.release();
if (useFilteredVertexmap_) frame.vertex_map.copy(temp_vertices_);
}
// 2. pass: generate normal map:
glDisable(GL_DEPTH_TEST);
glow::GlTextureRectangle erode_semantic_map(width_, height_, TextureFormat::RGBA_FLOAT);
semanticbuffer_.attach(FramebufferAttachment::COLOR0, frame.normal_map);
semanticbuffer_.attach(FramebufferAttachment::COLOR1, erode_semantic_map);
semanticbuffer_.bind();
glActiveTexture(GL_TEXTURE0);
if (filterVertexmap_)
temp_vertices_.bind();
else
frame.vertex_map.bind();
glActiveTexture(GL_TEXTURE1);
frame.semantic_map.bind();
sampler_.bind(0);
sampler_.bind(1);
vao_no_points_.bind();
normal_program_.bind();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // reset depth/normalmap
glDrawArrays(GL_POINTS, 0, 1);
normal_program_.release();
vao_no_points_.release();
semanticbuffer_.release();
glActiveTexture(GL_TEXTURE0);
if (filterVertexmap_)
temp_vertices_.release();
else
frame.vertex_map.release();
glActiveTexture(GL_TEXTURE1);
frame.semantic_map.release();
sampler_.release(0);
sampler_.release(1);
// 3. refine: floodfill semantic map:
glDisable(GL_DEPTH_TEST);
glow::GlTextureRectangle refine_semantic_map(width_, height_, TextureFormat::RGBA_FLOAT);
framebuffer_.attach(FramebufferAttachment::COLOR0, refine_semantic_map);
assert(framebuffer_.valid());
framebuffer_.bind();
glActiveTexture(GL_TEXTURE0);
if (filterVertexmap_)
temp_vertices_.bind();
else
frame.vertex_map.bind();
glActiveTexture(GL_TEXTURE1);
erode_semantic_map.bind();
sampler_.bind(0);
sampler_.bind(1);
vao_no_points_.bind();
floodfill_program_.bind();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // reset depth/normalmap
glDrawArrays(GL_POINTS, 0, 1);
floodfill_program_.release();
vao_no_points_.release();
framebuffer_.release();
glActiveTexture(GL_TEXTURE0);
if (filterVertexmap_)
temp_vertices_.release();
else
frame.vertex_map.release();
glActiveTexture(GL_TEXTURE1);
erode_semantic_map.release();
sampler_.release(0);
sampler_.release(1);
glFinish();
frame.valid = true;
frame.semantic_map.copy(refine_semantic_map);
// restore settings.
glViewport(ov[0], ov[1], ov[2], ov[3]);
if (depthTest) glEnable(GL_DEPTH_TEST);
glDepthFunc(depthFunc);
glClearColor(cc[0], cc[1], cc[2], cc[3]);
glPointSize(psize);
glBindFramebuffer(GL_FRAMEBUFFER, old_fbo);
CheckGlError();
}
3.3 Preprocessing初始化中的参数,涉及到一些核心的shader逻辑代码
Preprocessing::Preprocessing(const rv::ParameterList& params)
: width_(params["data_width"]),
height_(params["data_height"]),
framebuffer_(width_, height_, FramebufferTarget::BOTH),
semanticbuffer_(width_, height_, FramebufferTarget::BOTH),
temp_vertices_(width_, height_, TextureFormat::RGBA_FLOAT){
depth_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/gen_vertexmap.vert"));
depth_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/gen_vertexmap.frag"));
depth_program_.link();
avg_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/empty.vert"));
avg_program_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/quad.geom"));
avg_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/avg_vertexmap.frag"));
avg_program_.link();
bilateral_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/empty.vert"));
bilateral_program_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/quad.geom"));
bilateral_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/bilateral_filter.frag"));
bilateral_program_.link();
normal_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/empty.vert"));
normal_program_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/quad.geom"));
normal_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/gen_normalmap.frag"));
normal_program_.link();
floodfill_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/empty.vert"));
floodfill_program_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/quad.geom"));
floodfill_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/floodfill.frag"));
floodfill_program_.link();
...
}
3.3.1 gen_vertexmap.vert 创建vertex_map
vert_label = label;
vert_label_prob = prob;
// 这一部分是生成vertex map
float fov = abs(fov_up) + abs(fov_down);
float depth = length(position.xyz);
float yaw = atan(position.y, position.x);
float pitch = -asin(position.z / depth); // angle = acos((0,0,1) * p/||p||) - pi/2 = pi/2 - asin(x) + pi/2
float x = (-yaw * inv_pi); // in [-1, 1]
float y = (1.0 - 2.0 * (degrees(pitch) + fov_up) / fov); // in [-1, 1]
float z = 2.0f * ((depth - min_depth) / (max_depth - min_depth)) - 1.0f; // in [-1, 1]
// force that each point lies exactly inside the texel enabling reproducible results.
x = 2.0f * ((floor(0.5f * (x + 1.0f) * width ) + 0.5f ) / width) - 1.0;
y = 2.0f * ((floor(0.5f * (y + 1.0f) * height ) + 0.5f ) / height) - 1.0;
gl_Position = vec4(x, y, z, 1.0);
vertex_coord = vec4(position.xyz, 1.0);
// remove all movable objects during initialization period
// 在生成vertex map的时候就去除会动的物体,设置为无效
if(isfirst){
if(vert_label == car.w || vert_label == bicycle.w ||
vert_label == bus.w || vert_label == motorcycle.w||
vert_label == truck.w|| vert_label == other_vehicle.w||
vert_label == person.w||
vert_label == bicyclist.w || vert_label == motorcyclist.w)
vertex_coord = invalid;
}
3.3.2 gen_normalmap.frag 创建normal_map
float width = textureSize(vertex_map).x;
vec2 pos = texCoords * textureSize(vertex_map);
normal = invalid;
eroded_semantic_map = invalid; // for invalid points
if(texture(vertex_map, pos).w > 0.0f)
{
normal.w = 1.0f;
// 原始点位置
vec4 p = texture(vertex_map, pos);
// 周围四个点的位置
vec4 u = texture(vertex_map, vec2(wrap(pos.x + 1, width), pos.y));
vec4 v = texture(vertex_map, vec2(pos.x, pos.y + 1));
vec4 s = texture(vertex_map, vec2(wrap(pos.x - 1, width), pos.y));
vec4 t = texture(vertex_map, vec2(pos.x, pos.y - 1));
// 相邻差值
u.xyz = normalize(u.xyz - p.xyz);
v.xyz = normalize(v.xyz - p.xyz);
s.xyz = normalize(p.xyz - s.xyz);
t.xyz = normalize(p.xyz - t.xyz);
if(u.w < 1.0f && v.w < 1.0f) normal.w = 0;
if(s.w < 1.0f && t.w < 1.0f) normal.w = 0;
// 考虑位置的合法性
if(!valid(u) || !valid(v)) normal.w = 0;
// floodfill erosion
int kernel_size = 2;
// floodfill的前提是当前的pos点是有效的,即存在标签的
eroded_semantic_map = texture(semantic_map, pos);
for(int offset = 1; offset < kernel_size; offset++)
{
float p_label = texture(semantic_map, pos).x;
float u_label = texture(semantic_map, vec2(wrap(pos.x + offset, width), pos.y)).x;
float v_label = texture(semantic_map, vec2(pos.x, pos.y + offset)).x;
float s_label = texture(semantic_map, vec2(wrap(pos.x - offset, width), pos.y)).x;
float t_label = texture(semantic_map, vec2(pos.x, pos.y - offset)).x;
// 如果当前点与周围的点标签不一致且周围点存在标签,则这个eroded_semantic_map参数置为invalid,也就是说只有和周围的标签都一致的情况下才会被认定为有效的
// 对应于原论文当中的:语义mask中的一个点,如果在其邻域范围内,有其它语义信息的点,那么就认为这个点是边缘点,将这个点去除,全部处理一遍之后,相当于将两个物体边缘的部分去除掉了。这一部分对应于Algorithm 1里面的前半部分
if((p_label != u_label && u_label != 0.0) ||
(p_label != v_label && v_label != 0.0) ||
(p_label != s_label && s_label != 0.0) ||
(p_label != t_label && t_label != 0.0))
eroded_semantic_map = invalid;
}
// TODO: check if distances in x/y-direction are similar.
//if(abs(length(u) - length(s)) / max(length(u), length(s)) > 0.5) normal.w = 0;
//if(abs(length(v) - length(t)) / max(length(t), length(v)) > 0.5) normal.w = 0;
if(normal.w > 0.0f)
{
vec3 w = cross(u.xyz, v.xyz);
float len = length(w);
normal = vec4(w / len, 1.0);
normal.w = int(len > 0.0000001);
}
}
3.3.3 floodfill.frag
float width = textureSize(semantic_map).x;
vec2 pos = texCoords * textureSize(semantic_map);
refined_semantic_map = texture(semantic_map, pos);
int kernel_size = 3;
// floodfill
{
// 一共排查周围最大三个单位的点
for(int offset = 1; offset < kernel_size; offset++)
{
vec4 p = texture(vertex_map, pos);
vec4 u = texture(vertex_map, vec2(wrap(pos.x + offset, width), pos.y));
vec4 v = texture(vertex_map, vec2(pos.x, pos.y + offset));
vec4 s = texture(vertex_map, vec2(wrap(pos.x - offset, width), pos.y));
vec4 t = texture(vertex_map, vec2(pos.x, pos.y - offset));
vec4 p_label = texture(semantic_map, pos);
vec4 u_label = texture(semantic_map, vec2(wrap(pos.x + offset, width), pos.y));
vec4 v_label = texture(semantic_map, vec2(pos.x, pos.y + offset));
vec4 s_label = texture(semantic_map, vec2(wrap(pos.x - offset, width), pos.y));
vec4 t_label = texture(semantic_map, vec2(pos.x, pos.y - offset));
// 阈值设置为0.007
float threshold = 0.007;
// use reciprocal decay
// 从最里圈相邻的点开始遍历,如果里面的标签被去除且相邻标签存在,满足两个位置的距离小于一定的原距离乘以阈值,将相邻点的值赋值给refined_semantic_map,并且退出循环
// 即对应原文:再对这些去掉的空白位置,采用就近分配的原则,重新为其分配标签。也就是对应于Algorithm 1里面的后半部分
if(p_label.x == 0.0 && u_label.x != 0 && abs(length(p.xyz) - length(u.xyz)) < threshold * length(p.xyz))
{
refined_semantic_map = vec4(u_label.xyz, u_label.w / (offset + 1));
break;
}
else if(p_label.x == 0.0 && v_label.x != 0 && abs(length(p.xyz) - length(v.xyz)) < threshold * length(p.xyz))
{
refined_semantic_map = vec4(v_label.xyz, v_label.w / (offset + 1));
break;
}
else if(p_label.x == 0.0 && s_label.x != 0 && abs(length(p.xyz) - length(s.xyz)) < threshold * length(p.xyz))
{
refined_semantic_map = vec4(s_label.xyz, s_label.w / (offset + 1));
break;
}
else if(p_label.x == 0.0 && t_label.x != 0 && abs(length(p.xyz) - length(t.xyz)) < threshold * length(p.xyz))
{
refined_semantic_map = vec4(t_label.xyz, t_label.w / (offset + 1));
break;
}
}
}
3.4 SurfelMap初始化shader的一些关键信息
SurfelMap::SurfelMap(const ParameterList& params)
: dataWidth_(params["data_width"]),
dataHeight_(params["data_height"]),
modelWidth_(params["model_width"]),
modelHeight_(params["model_height"]),
indexMapFramebuffer_(dataWidth_, dataHeight_),
indexMap_(dataWidth_, dataHeight_, TextureFormat::R_FLOAT),
indexVertexMap_(dataWidth_, dataHeight_, TextureFormat::RGBA_FLOAT),
indexNormalMap_(dataWidth_, dataHeight_, TextureFormat::RGBA_FLOAT),
updateFramebuffer_(dataWidth_, dataHeight_),
measurementIntegrated_(dataWidth_, dataHeight_, TextureFormat::RGBA_FLOAT),
renderFramebuffer_(modelWidth_, modelHeight_),
radiusConfidenceFramebuffer_(dataWidth_, dataHeight_),
centerizedVertexMap_(dataWidth_, dataHeight_, TextureFormat::RGBA_FLOAT),
filteredNormalMap_(dataWidth_, dataHeight_, TextureFormat::RGBA_FLOAT),
radiusConfidenceMap_(dataWidth_, dataHeight_, TextureFormat::RGBA_FLOAT),
poseTexture_(poseBuffer_, TextureFormat::RGBA_FLOAT),
color_map_1dtex_(260, TextureFormat::RGB_FLOAT)
{
glow::_CheckGlError(__FILE__, __LINE__);
// 面元的绘制,如何确定颜色、坐标位置等参数,其实面元在这里就是一个球状的点,在原先的点云基础上加了一个半径,变成面元素
// 每个面元用一个位置向量、一个法向量和一个半径表示,
// vec4 p = gl_in[0].gl_Position; //位置向量
// vec4 n = gs_in[0].normal; // 法向量
// radius = gs_in[0].radius; // 半径
draw_surfels_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/draw_surfels.vert"));
draw_surfels_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/draw_surfels.geom"));
draw_surfels_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/draw_surfels.frag"));
draw_surfels_.link();
// 感觉跟上面的那一块功能比较类似, 除了没有提到半径,应该是只考虑绘制一些点
draw_surfelPoints_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/draw_surfelPoints.vert"));
draw_surfelPoints_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/passthrough.frag"));
draw_surfelPoints_.link();
// 对面元的一些参数进行赋值,如sfl_position_radius、sfl_normal_confidence等
initialize_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/gen_surfels.vert"));
initialize_program_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/gen_surfels.geom"));
initialize_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/empty.frag"));
initialize_program_.attach(initialize_feedback_);
initialize_program_.link();
initialize_program_.setUniform(GlUniform<int32_t>("poseBuffer", 5));
// 确定面元的半径并且保存
radConf_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/init_radiusConf.vert"));
radConf_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/init_radiusConf.frag"));
radConf_program_.link();
// 确定面元的下标地图
// 其中的位置也是由投影所影响
// gl_Position = vec4(2.0 * projectSpherical(p) - vec3(1.0), 1.0);
// index = gl_VertexID + 1; // initially all index pixels are 0.
indexMap_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/gen_indexmap.vert"));
indexMap_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/gen_indexmap.frag"));
indexMap_program_.link();
// 面元置信度的更新以及面元的顶点向量、法线向量和半径的更新
update_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/update_surfels.vert")); // for semantic map
update_program_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/update_surfels.geom"));
update_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/update_surfels.frag"));
update_program_.attach(update_feedback_);
update_program_.link();
update_program_.setUniform(GlUniform<int32_t>("poseBuffer", 5));
// 把一个已有的面元赋值给另一个面元进行输出
copy_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/copy_surfels.vert"));
copy_program_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/copy_surfels.geom"));
copy_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/empty.frag"));
copy_program_.attach(copy_feedback_);
copy_program_.link();
// 渲染面元,类似于面元的绘制
render_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/render_surfels.vert"));
render_program_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/render_surfels.geom"));
render_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/render_surfels.frag"));
render_program_.link();
// 把新的和旧的点云向量和法向量进行更新,如果满足一定条件,用旧的代替新的
compose_program_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/empty.vert"));
compose_program_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/quad.geom"));
compose_program_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/render_compose.frag"));
compose_program_.link();
...
extractProgram_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/extract_surfels.vert"));
extractProgram_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/copy_surfels.geom"));
extractProgram_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/empty.frag"));
extractProgram_.attach(extractFeedback_);
extractProgram_.link();
...
drawSubmaps_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/draw_submaps.vert"));
drawSubmaps_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/draw_submaps.geom"));
drawSubmaps_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/coloredvertices.frag"));
drawSubmaps_.link();
3.4.1 init_radiusConf
const vec2 halfpix = vec2(0.5f, 0.5f);
const float sqrt2 = 1.41421356237f;
const float pi = 3.14159265358979323846f;
const float inv_pi = 0.31830988618379067154f;
const float pi_2 = 1.57079632679;
out float valid;
out vec4 centerized_vertex;
out float radius;
out float confidence;
float get_radius(vec4 vertex, vec4 normal, float pixel_size)
{
float d = length(vertex.xyz);
// 这一块对应论文中面元的半径计算
return 1.41 * d * pixel_size / clamp(dot(normal.xyz, -vertex.xyz / d), 0.5, 1.0);
}
void main()
{
vec2 tex_dim = textureSize(vertex_map);
gl_Position = vec4(2.0 * img_coords / tex_dim - vec2(1.0), 0, 1);
valid = 0.0;
radius = 0.0;
confidence = 0.0;
vec4 vertex = texture(vertex_map, img_coords);
vec4 normal = texture(normal_map, img_coords);
vec3 view_dir = -vertex.xyz / length(vertex.xyz);
float angle = dot(normal.xyz, view_dir);
// 只有满足以下条件才会保存半径
if(vertex.w > 0.5 && normal.w > 0.5 && angle > angle_thresh)
{
valid = 1.0;
// 这个angle取一个互补的值
float angle = 1.0 - clamp(dot(normal.xyz, view_dir), 0, 1);
float confidence = 1.0f;
centerized_vertex = vertex;
radius = get_radius(vertex, normal, pixel_size);
// 只有半径计算出在最值之间,才采用
radius = min(max(radius, min_radius), max_radius);
}
}
3.4.2 update_surfels.vert
// 这个函数是将原来的坐标投影到球形坐标
vec3 projectSpherical(vec3 position)
{
float fov = abs(fov_up) + abs(fov_down);
float depth = length(position.xyz);
float yaw = atan(position.y, position.x);
float pitch = -asin(position.z / depth);
float x = 0.5 * ((-yaw * inv_pi) + 1.0); // in [0, 1]
float y = (1.0 - (degrees(pitch) + fov_up) / fov); // in [0, 1]
float z = (depth - min_depth) / (max_depth - min_depth); // in [0, 1]
// half-pixel coordinates.
x = (floor(x * width) + 0.5);
y = (floor(y * height) + 0.5);
return vec3(x, y, z);
}
vec4 centerize(vec2 img_coords, vec3 position, vec3 normal, vec2 dim)
{
float fov = abs(fov_up) + abs(fov_down);
float x05 = (floor(img_coords.x) + 0.5f) / dim.x;
float y05 = (floor(img_coords.y) + 0.5f) / dim.y;
float theta = radians((1.0f - y05) * fov - fov_up) + pi_2;
float phi = -(2.0 * x05 - 1.0f) * pi;
vec3 nu = vec3(sin(theta)*cos(phi), sin(theta)*sin(phi), cos(theta));
float r = dot(normal, position) / dot(normal, nu);
return vec4(r * nu, 1.0);
}
vec3 slerp(vec3 v0, vec3 v1, float weight)
{
float omega = acos(dot(normalize(v0), normalize(v1)));
// weight is actually (1.0 - t), therefore inverted.
float eta = 1.0 / sin(omega);
float w0 = eta * sin(weight * omega);
float w1 = eta * sin((1.-weight)* omega);
return w0 * v0 + w1 * v1;
}
float get_radius(vec4 vertex, vec4 normal, float pixel_size)
{
float d = length(vertex.xyz);
return 1.41 * d * pixel_size / clamp(dot(normal.xyz, -vertex.xyz / d), 0.5, 1.0);
}
mat4 get_pose(int timestamp)
{
int offset = 4 * timestamp;
return mat4(texelFetch(poseBuffer, offset), texelFetch(poseBuffer, offset + 1),
texelFetch(poseBuffer, offset + 2), texelFetch(poseBuffer, offset + 3));
}
void main()
{
const float upper_stability_bound = 20.0;
int surfel_age = timestamp - surfel_timestamp;
int creation_timestamp = int(surfel_color_weight_count.z);
// read column-wise surfel pose from texture buffer:
mat4 surfelPose = get_pose(creation_timestamp);
vec3 old_position = (surfelPose * vec4(surfel_position_radius.xyz, 1)).xyz;
vec3 old_normal = (surfelPose* vec4(surfel_normal_confidence.xyz, 0)).xyz;
float old_radius = surfel_position_radius.w;
float old_confidence = surfel_normal_confidence.w;
float old_weight = surfel_color_weight_count.y;
vs_out.valid = true;
if(old_confidence < confidence_threshold && use_stability) vs_out.valid = (surfel_age < unstable_age);
vs_out.position_radius = surfel_position_radius;
vs_out.normal_confidence = surfel_normal_confidence;
vs_out.timestamp = surfel_timestamp;
vs_out.color_weight_count = surfel_color_weight_count;
vs_out.color_weight_count.x = pack(vec3(0.3, 0.3, 0.3));
vs_out.semantic_map = surfel_semantic_map; // for semantic map
vec4 vertex = inv_pose * vec4(old_position, 1.0);
vec4 normal = normalize(inv_pose * vec4(old_normal, 0.0));
bool visible = (dot(normal.xyz, -vertex.xyz / length(vertex.xyz)) > 0.00);
vec3 img_coords = projectSpherical(vertex.xyz);
vec3 dim = vec3(width, height, 1);
gl_Position = vec4(-10.0);
bool valid = (texture(vertex_map, img_coords.xy).w > 0.5f) && (texture(normal_map, img_coords.xy).w > 0.5f);
bool inside = all(lessThan(img_coords, dim)) && !(all(lessThan(img_coords, vec3(0))));
// 可信度函数是增加语义一致性项来加速置信度的降低
float penalty = 0.0;
float update_confidence = log_prior;
if(valid && inside && visible)
{
// detect and renalize outliers
float data_label = texture(semantic_map_in, img_coords.xy).x * 255.0;
float data_prob = texture(semantic_map_in, img_coords.xy).w;
float model_label = surfel_semantic_map.x * 255.0;
float model_prob = surfel_semantic_map.w;
// 同一个位置但是标签变了
if(round(data_label) != round(model_label))
{
if( model_label == car.w || model_label == bicycle.w ||
model_label == bus.w || model_label == motorcycle.w||
model_label == truck.w|| model_label == other_vehicle.w||
model_label == person.w||
model_label == bicyclist.w || model_label == motorcyclist.w)
// 如果标签为运动的物体,惩罚系数起作用
penalty = 1.0;
}
mat4 surfelPose_inv = inverse(surfelPose);
// Check if surfel and measurment are compatible.
vec3 v = texture(vertex_map, img_coords.xy).xyz;
vec3 n = texture(normal_map, img_coords.xy).xyz;
vec4 v_global = pose * vec4(v, 1.0f);
// Note: We assume non-scaling transformation, det(pose) = 1.0. Therefore Rot(pose^T^-1) = Rot(pose).
vec4 n_global = normalize(pose * vec4(n, 0.0f));
float depth = length(v);
vec3 view_dir = -v/length(v);
float distance = abs(dot(old_normal.xyz, v_global.xyz - old_position.xyz)); // distance(v_global.xyz, old_position.xyz);
float angle = length(cross(n_global.xyz, old_normal.xyz));
float new_radius = texture(radiusConfidence_map, img_coords.xy).x;
float new_confidence = texture(radiusConfidence_map, img_coords.xy).y;
// measurement is compatible, update conf & timestamp, but integrate measurment only if new radius is smaller!
// 如果存在一个面元,那么就检测距离与角度的偏差值:
if((distance < distance_thresh) && (angle < angle_thresh))
{
gl_Position = vec4(2.0 * img_coords / dim - 1.0f, 1.0f); // measurement integrated: no need to generate surfel.
float confidence = old_confidence + new_confidence;
// always update confidence and timestamp:
vs_out.normal_confidence.w = confidence;
vs_out.timestamp = timestamp;
float avg_radius = min(new_radius, old_radius);
avg_radius = max(avg_radius, min_radius);
vs_out.position_radius.w = avg_radius;
vs_out.valid = true;
vs_out.color_weight_count.x = pack(vec3(0.0, 0.7, 0.0f)); // green
vs_out.color_weight_count.z = creation_timestamp;
float r = (depth - min_depth) / (max_depth - min_depth);
float a = angle;
float d = distance;
float p = p_stable;
// 面元置信度函数,论文对每个面元设置了一个优势比(odds ratio),用来体现一个面元的可靠程度,其计算方法为:
if(confidence_mode == 1 || confidence_mode == 3) p *= exp(-a*a / (sigma_angle*sigma_angle));
if(confidence_mode == 2 || confidence_mode == 3) p *= exp(-d*d / (sigma_distance*sigma_distance));
p = clamp(p, p_unstable, 1.0);
// odds函数
update_confidence = log(p / (1.0 - p));
// 如果符合条件,则认为当前的点所在的面元和地图上的面元是一个面元,如果新的测量更加准确,比如说半径更小,就进行整合操作
if((new_radius < old_radius && timestamp - creation_timestamp < active_timestamps) || update_always)
{
float w1 = 0.9;
float w2 = 0.1;
if(weighting_scheme > 0)
{
w1 = old_weight;
w2 = 1;
if(weighting_scheme == 2) w2 = dot(n, view_dir);
vs_out.color_weight_count.y = min(max_weight, w1 + w2);
float sum = w1 + w2;
w1 /= sum;
w2 /= sum;
}
// 更新面元的顶点向量和法向量
vec3 avg_position = w1 * old_position.xyz + w2 * v_global.xyz;
// vec3 avg_normal = normalize(w1 * old_normal.xyz + w2 * n_global.xyz);
vec3 avg_normal = slerp(old_normal.xyz, n_global.xyz, w1);
// update semantic probability
float avg_prob = 0;
if(round(data_label) != round(model_label))
avg_prob = w1 * model_prob + w2 * (1 - data_prob);
else
avg_prob = w1 * model_prob + w2 * data_prob;
vs_out.semantic_map.w = avg_prob;
// re-center the vertex such that it should cover the pixel again completely.
// vec3 v_s = (inv_pose * vec4(avg_position,1.0)).xyz;
// vec3 n_s = (inv_pose * vec4(avg_normal,0.0)).xyz;
// vec4 cpos = centerize(img_coords.xy, v_s, n_s, textureSize(vertex_map));
// avg_position = (pose * cpos).xyz;
// avg_position = centerize(img_coords.xy, avg_position, avg_normal, textureSize(vertex_map));
// 一般情况是取averaging_scheme == 0
if(averaging_scheme == 1)
{
// move surfel along the normal towards measurment.
avg_position = old_position.xyz + w2 * distance * old_normal.xyz;
avg_normal = slerp(old_normal.xyz, n_global.xyz, w1);
}
avg_normal = normalize(avg_normal);
avg_position = (surfelPose_inv * vec4(avg_position.xyz, 1)).xyz;
avg_normal = (surfelPose_inv * vec4(avg_normal.xyz, 0)).xyz;
vs_out.position_radius = vec4(avg_position.xyz, avg_radius);
vs_out.normal_confidence = vec4(avg_normal.xyz, confidence);
vs_out.color_weight_count.x = pack(vec3(1.0, 0.0, 1.0f)); // magenta: measurement integrated.
}
}
else
{
int idx = int(texture(index_map, img_coords.xy)) - 1;
if(idx == gl_VertexID) // ensure that surfel is closest visible surfel.
{
// if not matching; reduce confidence...
// 如果不符合条件,就调低面元的可信度,除此之外不做其它操作。在一次更新之后,去除过旧的面元和可信度过低的面元。
update_confidence = log(p_unstable/(1.0 - p_unstable));
vs_out.color_weight_count.x = pack(vec3(0.0, 1.0, 1.0f));
}
}
}
else if(visible && inside && false)
{
update_confidence = log_unstable;
vs_out.color_weight_count.x = pack(vec3(1.0, 0.0, 0.0f));
// we have a visible surfel without associated measurment => adjust radius to cover pixel
int idx = int(texture(index_map, img_coords.xy)) - 1;
if(idx == gl_VertexID && img_coords.y > 10) // ensure that surfel is closest visible surfel.
{
//vs_out.position_radius.w = min(old_radius, get_radius(vertex, normal, pixel_size));
}
}
// 简单来说就是如果是同一个位置但是标签变了,那么就认为这个位置上的面元是动态物体上的一个面元,通过在可信度函数是增加语义一致性项来加速置信度的降低。
update_confidence = update_confidence - penalty;
// static state bayes filter (see Thrun et al., Probabilistic Robotics, p. 286):
if(use_stability)
vs_out.normal_confidence.w = min(old_confidence + update_confidence - log_prior, upper_stability_bound);
else
vs_out.normal_confidence.w = old_confidence;
if(vs_out.normal_confidence.w < log_unstable && use_stability) vs_out.valid = false;
}
3.5 对于ICP部分的初始化
objective_.reset(new Frame2Model(params));
Frame2Model::Frame2Model(const rv::ParameterList& params)
: JtJJtf_blend_(2, 8, TextureFormat::RGBA_FLOAT),
fbo_blend_(2, 8) // blending for matrix comp.
{
...
program_blend_.attach(GlShader::fromCache(ShaderType::VERTEX_SHADER, "shader/Frame2Model_jacobians.vert"));
program_blend_.attach(GlShader::fromCache(ShaderType::GEOMETRY_SHADER, "shader/Frame2Model_jacobians.geom"));
program_blend_.attach(GlShader::fromCache(ShaderType::FRAGMENT_SHADER, "shader/Frame2Model_jacobians.frag"));
program_blend_.link();
program_blend_.setUniform(GlUniform<int32_t>("vertex_model", 0));
program_blend_.setUniform(GlUniform<int32_t>("normal_model", 1));
program_blend_.setUniform(GlUniform<int32_t>("vertex_data", 2));
program_blend_.setUniform(GlUniform<int32_t>("normal_data", 3));
program_blend_.setUniform(GlUniform<Eigen::Matrix4f>("pose", Eigen::Matrix4f::Identity()));
program_blend_.setUniform(GlUniform<int32_t>("entries_per_kernel", entriesPerKernel_));
updateParameters();
}
3.5.1 Frame2Model_jacobians.geom
layout(points) in;
layout(points, max_vertices = 16) out;
uniform sampler2DRect vertex_model;
uniform sampler2DRect normal_model;
uniform sampler2DRect vertex_data;
uniform sampler2DRect normal_data;
uniform sampler2DRect semantic_model; // [0,width]
uniform sampler2DRect semantic_data;
uniform float distance_thresh;
uniform float angle_thresh;
uniform mat4 pose;
uniform float fov_up;
uniform float fov_down;
uniform int entries_per_kernel;
const float pi = 3.14159265358979323846f;
const float inv_pi = 0.31830988618379067154f;
const float pi_2 = 1.57079632679;
uniform int iteration;
uniform int weight_function; // 0 - none, 1 - huber, 2 - turkey, 3 - stability.
uniform float factor;
uniform float cutoff_threshold;
#include "shader/color_map.glsl"
in VS_OUT {
vec2 texCoords;
} gs_in[];
out vec3 values;
vec2 mat_coords(int i, int j)
{
// keep in mind that we only have OpenGLs coord system...
return vec2(2.0 * (j + 0.5) / 2.0 - 1.0, 2.0 * (i + 0.5) / 8.0 - 1.0);
}
vec2 project2model(vec4 vertex)
{
vec2 tex_dim = textureSize(vertex_model);
float fov = abs(fov_up) + abs(fov_down);
float depth = length(vertex.xyz);
float yaw = atan(vertex.y, vertex.x);
float pitch = -asin(vertex.z / depth); // angle = acos((0,0,1) * p/||p||) - pi/2 = pi/2 - asin(x) + pi/2
float x = 0.5 * ((-yaw * inv_pi) + 1.0); // in [0, 1]
float y = 1.0 - (degrees(pitch) + fov_up) / fov; // in [0, 1]
return vec2(x, y)*tex_dim; // [0, w], [0, h]
}
void main()
{
vec2 tex_dim = textureSize(vertex_data);
vec2 model_dim = textureSize(vertex_model);
bool has_inlier = false;
// Idea: compute aggregated values, then submit values.
// store Jacobian in column-major order; store J^T*f in last row.
vec3 temp[16];
for(int i = 0; i < 16; ++i) temp[i] = vec3(0);
for(int e = 0; e < entries_per_kernel; ++e)
{
vec2 texCoords = gs_in[0].texCoords + vec2(e, 0);
if(texCoords.x >= tex_dim.x || texCoords.y >= tex_dim.y) continue;
vec2 img_coords = texCoords.xy / tex_dim;
// 扫描点的坐标和法向量
float e_d = texture(vertex_data, texCoords).w + texture(normal_data, texCoords).w;
vec4 v_d = pose * vec4(texture(vertex_data, texCoords).xyz, 1.0);
vec4 n_d = pose * vec4(texture(normal_data, texCoords).xyz, 0.0); // assuming non-scaling transform
bool inlier = false;
vec2 idx = project2model(v_d);
if(idx.x < 0 || idx.x >= model_dim.x || idx.y < 0 || idx.y >= model_dim.y)
{
e_d = 0.0f;
}
// 地图点的坐标及法向量
float e_m = texture(vertex_model, idx).w + texture(normal_model, idx).w;
vec3 v_m = texture(vertex_model, idx).xyz;
vec3 n_m = texture(normal_model, idx).xyz;
// Reminder: use ifs instead of if(...) return; to avoid problemns with Intel cpu.
if((e_m > 1.5f) && (e_d > 1.5f))
{
has_inlier = true;
bool inlier = true;
if(length(v_m.xyz - v_d.xyz) > distance_thresh) inlier = false;
if(dot(n_m.xyz, n_d.xyz) < angle_thresh) inlier = false;
float residual = (dot(n_m.xyz, (v_d.xyz - v_m.xyz)));
vec3 n = n_m;
vec3 cp = cross(v_d.xyz, n_m.xyz);
float weight = 1.0;
// 确定不同情况下的比重值
if((weight_function == 4 || weight_function == 1))
{
// huber weighting.
if(abs(residual) > factor)
{
// 默认情况下的Huber参数是1,当残差的绝对值大于factor,则取以下值
weight = factor / abs(residual);
}
}
else if(weight_function == 2 && iteration > 0)
{
// turkey bi-squared weighting:
if(abs(residual) > factor)
{
weight = 0;
}
else
{
float alpha = residual / factor;
weight = (1.0 - alpha * alpha);
weight = weight * weight;
}
}
// use semantic information during ICP
float data_label = texture(semantic_data, texCoords).x * 255.0;
float data_label_prob = texture(semantic_data, texCoords).w;
float model_label = texture(semantic_model, idx).x * 255.0;
if( model_label == car.w || model_label == bicycle.w ||
model_label == bus.w || model_label == motorcycle.w||
model_label == truck.w|| model_label == other_vehicle.w||
model_label == person.w||
model_label == bicyclist.w || model_label == motorcyclist.w)
{
// 通过比较地图点与扫描点之间的语义一致性来调整权重
if(round(data_label) != round(model_label))
weight *= (1 - data_label_prob);
else
weight *= data_label_prob;
}
if(inlier)
{
temp[0] += weight * n.x * n;
temp[1] += weight * n.y * n;
temp[2] += weight * n.z * n;
temp[3] += weight * cp.x * n;
temp[4] += weight * cp.y * n;
temp[5] += weight * cp.z * n;
temp[6] += weight * n.x * cp;
temp[7] += weight * n.y * cp;
temp[8] += weight * n.z * cp;
temp[9] += weight * cp.x * cp;
temp[10] += weight * cp.y * cp;
temp[11] += weight * cp.z * cp;
// compute J^T * W * f => 2 vertices
// 下面的这些temp值就是
temp[12] += weight * residual * n;
temp[13] += weight * residual * cp;
temp[14].x += 1.0f; // terms in error function (inlier + outlier)
temp[14].y += weight * residual * residual; // residual
temp[15].x += weight * residual * residual; // inlier residual
}
else
{
// was cut-off due to gross outlier rejection.
temp[14].x += 1.0f; // terms.
temp[14].y += weight * residual * residual; // "outlier" residual.
temp[14].z += 1.0f;// num_outliers.
}
}
else
{
temp[15].y += 1.0; // invalid count.
}
}
// SUBMISSION:
if(has_inlier)
{
for(int i = 0; i < 6; ++i)
{
gl_Position = vec4(mat_coords(i, 0), 0.0, 1.0);
values = temp[i];
EmitVertex();
EndPrimitive();
gl_Position = vec4(mat_coords(i, 1), 0.0, 1.0);
values = temp[i + 6];
EmitVertex();
EndPrimitive();
}
}
gl_Position = vec4(mat_coords(6, 0), 0.0, 1.0);
values = temp[12];
EmitVertex();
EndPrimitive();
gl_Position = vec4(mat_coords(6, 1), 0.0, 1.0);
values = temp[13];
EmitVertex();
EndPrimitive();
gl_Position = vec4(mat_coords(7, 0), 0.0, 1.0);
values = temp[14];
EmitVertex();
EndPrimitive();
gl_Position = vec4(mat_coords(7, 1), 0.0, 1.0);
values = temp[15];
EmitVertex();
EndPrimitive();
}
4. SUMA++中有关语义模型rangenet++的部分
// initialize the rangenet
net = std::shared_ptr<RangenetAPI> (new RangenetAPI(params_));
// 这里的标签地图指的是顺序排列的序号(0-19)对应的相应类标签的索引(值)
label_map_ = net->getLabelMap();
// 这里的颜色地图是以标签序号作为下标的,存储的值为对应的颜色
color_map_ = net->getColorMap();
4.1 下面是解析模型引擎
NetTensorRT::NetTensorRT(const std::string& model_path)
: Net(model_path), _engine(0), _context(0) {
// set default verbosity level
verbosity(_verbose);
// Try to open the model
std::cout << "Trying to open model" << std::endl;
// generate trt path form model path
std::string engine_path = model_path + "/model.trt";
// try to deserialize the engine
try {
// 判断文件是否存在
// file_ifstream(engine_path.c_str());
// 将模型读取到缓冲区中
// gieModelStream << file_ifstream.rdbuf();
// 对其进行反序列化以获得引擎:
// _engine = infer->deserializeCudaEngine(modelMem, modelSize);
deserializeEngine(engine_path);
} catch (std::exception e) {
std::cout << "Could not deserialize TensorRT engine. " << std::endl
<< "Generating from sratch... This may take a while..."
<< std::endl;
// destroy crap from engine
// if (_engine) _engine->destroy();
if (_engine) delete _engine;
} catch (...) {
throw std::runtime_error("Unknown TensorRT exception. Giving up.");
}
// 下面这段理论上在SUMA++里面是不执行的
// if there is no engine, try to generate one from onnx
if (!_engine) {
// generate path
std::string onnx_path = model_path + "/model.onnx";
// generate engine
generateEngine(onnx_path);
// save engine
serializeEngine(engine_path);
}
// prepare buffers for io :)
prepareBuffer();
CUDA_CHECK(cudaStreamCreate(&_cudaStream));
} // namespace segmentation
4.2 下面这块是从配置文件中读取颜色以及标签还有一些参数的部分
Net::Net(const std::string& model_path)
: _model_path(model_path), _verbose(true) {
// set default verbosity level
verbosity(_verbose);
// Try to get the config file as well
// 配置参数路径
std::string arch_cfg_path = _model_path + "/arch_cfg.yaml";
try {
// 读取有关模型的一些配置参数
arch_cfg = YAML::LoadFile(arch_cfg_path);
} catch (YAML::Exception& ex) {
throw std::runtime_error("Can't open cfg.yaml from " + arch_cfg_path);
}
// Assign fov_up and fov_down from arch_cfg
_fov_up = arch_cfg["dataset"]["sensor"]["fov_up"].as
<double>();
_fov_down = arch_cfg["dataset"]["sensor"]["fov_down"].as
<double>();
// kitti数据集的标签文件路径
std::string data_cfg_path = _model_path + "/data_cfg.yaml";
try {
// 读取相关标签及颜色配置文件
data_cfg = YAML::LoadFile(data_cfg_path);
} catch (YAML::Exception& ex) {
throw std::runtime_error("Can't open cfg.yaml from " + data_cfg_path);
}
// Get label dictionary from yaml cfg
YAML::Node color_map;
try {
// 读取其中的颜色
color_map = data_cfg["color_map"];
} catch (YAML::Exception& ex) {
std::cerr << "Can't open one the label dictionary from cfg in " + data_cfg_path
<< std::endl;
throw ex;
}
// Generate string map from xentropy indexes (that we'll get from argmax)
YAML::const_iterator it;
for (it = color_map.begin(); it != color_map.end(); ++it) {
// Get label and key
// 取出前面对应的序号
int key = it->first.as<int>(); // <- key
Net::color color = std::make_tuple(
// 取出对应的颜色,分别为bgr三色
static_cast<u_char>(color_map[key][0].as<unsigned int>()),
static_cast<u_char>(color_map[key][1].as<unsigned int>()),
static_cast<u_char>(color_map[key][2].as<unsigned int>()));
_color_map[key] = color;
}
// Get learning class labels from yaml cfg
YAML::Node learning_class;
try {
// 取出顺序排列的所有19种标签对应的类(其中已经有一部分标签被整合,例如moving-bicyclist和bicyclist以及moving-person和person)
// 这一步表明其实语义可以筛选出来动目标,但是显然作者在使用的过程中,并未以此作为动静目标的划分优化,而是选择将他们看成一个类。
learning_class = data_cfg["learning_map_inv"];
} catch (YAML::Exception& ex) {
std::cerr << "Can't open one the label dictionary from cfg in " + data_cfg_path
<< std::endl;
throw ex;
}
// get the number of classes
_n_classes = learning_class.size();
// remapping the colormap lookup table
// 存储颜色信息到_argmax_to_rgb[key]数组中,相当于按顺序存储,并且通过_lable_map索引对应的类标签序号
_lable_map.resize(_n_classes);
for (it = learning_class.begin(); it != learning_class.end(); ++it) {
int key = it->first.as<int>(); // <- key
_argmax_to_rgb[key] = _color_map[learning_class[key].as<unsigned int>()];
_lable_map[key] = learning_class[key].as<unsigned int>();
}
// 后面都是从arch_cfg中读取数据,存入对应的参数中
// get image size
_img_h = arch_cfg["dataset"]["sensor"]["img_prop"]["height"].as<int>();
_img_w = arch_cfg["dataset"]["sensor"]["img_prop"]["width"].as<int>();
_img_d = 5; // range, x, y, z, remission
// get normalization parameters
YAML::Node img_means, img_stds;
try {
img_means = arch_cfg["dataset"]["sensor"]["img_means"];
img_stds = arch_cfg["dataset"]["sensor"]["img_stds"];
} catch (YAML::Exception& ex) {
std::cerr << "Can't open one the mean or std dictionary from cfg"
<< std::endl;
throw ex;
}
// fill in means from yaml node
for (it = img_means.begin(); it != img_means.end(); ++it) {
// Get value
float mean = it->as<float>();
// Put in indexing vector
_img_means.push_back(mean);
}
// fill in stds from yaml node
for (it = img_stds.begin(); it != img_stds.end(); ++it) {
// Get value
float std = it->as<float>();
// Put in indexing vector
_img_stds.push_back(std);
}
}
4.3 这一块是执行语义模块并且将预测结果保存在一个语义容器当中,作为语义mask
std::vector<std::vector<float>> NetTensorRT::infer(const std::vector<float>& scan, const uint32_t& num_points) {
// check if engine is valid
// 引擎是否是可行的
if (!_engine) {
throw std::runtime_error("Invaild engine on inference.");
}
// start inference
// 开始推理运算的计时
if (_verbose) {
tic();
std::cout << "Inferring with TensorRT" << std::endl;
tic();
}
// project point clouds into range image
// 将三维点云数据(x,y,z,w)投影为二维数据(u,v),方便进行后续的推理,其中保存的键为(u,v),也就是sorted_proj_ys[i] * _img_w + sorted_proj_xs[i]),值则是 inputs[i],即input = {ranges[idx], xs[idx], ys[idx], zs[idx], intensitys[idx]}
std::vector<std::vector<float>> projected_data = doProjection(scan, num_points);
// 三维投影二维的时间
if (_verbose) {
std::cout << "Time for projection: "
<< toc() * 1000
<< "ms" << std::endl;
tic();
}
// put in buffer using position
// 后续取出推理后结果的参数
int channel_offset = _img_h * _img_w;
bool all_zeros = false;
std::vector<int> invalid_idxs;
for (uint32_t pixel_id = 0; pixel_id < projected_data.size(); pixel_id++){
// 检查数据是否合理
// check if the pixel is invalid
all_zeros = std::all_of(projected_data[pixel_id].begin(), projected_data[pixel_id].end(), [](int i) { return i==0.0f; });
if (all_zeros) {
invalid_idxs.push_back(pixel_id);
}
for (int i = 0; i < _img_d; i++) {
// normalize the data
// 预处理数据,将数据进行规范化以方便后续进行推理
if (!all_zeros) {
projected_data[pixel_id][i] = (projected_data[pixel_id][i] - this->_img_means[i]) / this->_img_stds[i];
}
int buffer_idx = channel_offset * i + pixel_id;
// 送入host缓冲当中进行推理
((float*)_hostBuffers[_inBindIdx])[buffer_idx] = projected_data[pixel_id][i];
}
}
// clock now
// 前期的预处理所消耗的时间
if (_verbose) {
std::cout << "Time for preprocessing: "
<< toc() * 1000
<< "ms" << std::endl;
tic();
}
// execute inference
// 推理部分,采用CUDA,我只当是黑盒(不关心怎么跑,反正知道结果长什么样子就行)
// "step1.拷贝数据 从主机(CPU)--->设备(GPU)"
CUDA_CHECK(
cudaMemcpyAsync(_deviceBuffers[_inBindIdx], _hostBuffers[_inBindIdx],
getBufferSize(_engine->getBindingDimensions(_inBindIdx),
_engine->getBindingDataType(_inBindIdx)),
cudaMemcpyHostToDevice, _cudaStream));
if (_verbose) {
CUDA_CHECK(cudaStreamSynchronize(_cudaStream));
std::cout << "Time for copy in: "
<< toc() * 1000
<< "ms" << std::endl;
tic();
}
// "step2.执行推理"
// _context->enqueue(1, &_deviceBuffers[_inBindIdx], _cudaStream, nullptr);
_context->enqueueV2(&_deviceBuffers[_inBindIdx], _cudaStream, nullptr);
// 推理完毕,计时
if (_verbose) {
CUDA_CHECK(cudaStreamSynchronize(_cudaStream));
std::cout << "Time for inferring: "
<< toc() * 1000
<< "ms" << std::endl;
tic();
}
// 推理后的数据进行复制
// "step3.拷贝数据 从设备(GPU)--->主机(CPU)"
CUDA_CHECK(
cudaMemcpyAsync(_hostBuffers[_outBindIdx], _deviceBuffers[_outBindIdx],
getBufferSize(_engine->getBindingDimensions(_outBindIdx),
_engine->getBindingDataType(_outBindIdx)),
cudaMemcpyDeviceToHost, _cudaStream));
// 同步流
CUDA_CHECK(cudaStreamSynchronize(_cudaStream));
// 复制用时
if (_verbose) {
std::cout << "Time for copy back: "
<< toc() * 1000
<< "ms" << std::endl;
tic();
}
// take the data out
// 将推理后的结果保存到一个二维range图上
std::vector<std::vector<float>> range_image(channel_offset);
// 可以看出第一维度的数据还是没有变,但是输出的第二个维度,从原来的(range,x,y,z,intensity)变成了(classes0,classes1,...,classesn)应当是0-19一共20个种类
for (int pixel_id = 0; pixel_id < channel_offset; pixel_id++){
for (int i = 0; i < _n_classes; i++) {
int buffer_idx = channel_offset * i + pixel_id;
range_image[pixel_id].push_back(((float*)_hostBuffers[_outBindIdx])[buffer_idx]);
}
}
// 保存消耗的时间
if (_verbose) {
std::cout << "Time for taking the data out: "
<< toc() * 1000
<< "ms" << std::endl;
tic();
}
// 之前检查出来的无效点云,也进行保存
// set invalid pixels
for (int idx : invalid_idxs) {
range_image[idx] = invalid_output;
}
// 将所有处理后的二维语义信息,反投影回一个三维的容器semantic_scan当中,作为语义mask来进行后续的处理
// unprojection, labelling raw point clouds
std::vector<std::vector<float>> semantic_scan;
for (uint32_t i = 0 ; i < num_points; i++) {
semantic_scan.push_back(range_image[proj_ys[i] * _img_w + proj_xs[i]]);
}
if (_verbose) {
std::cout << "Time for unprojection: "
<< toc() * 1000
<< "ms" << std::endl;
std::cout << "Time for the whole: "
<< toc() * 1000
<< "ms" << std::endl;
}
// 返回的该数据格式应当是(_img_h * _img_w,20),这20个数据表明每一种类别的概率
return semantic_scan;
}