注:视觉SLAM算法使用ORBSLAM2,语义分割网络模型使用deeplabv2。
源码分析
Github:Semantic_Mapping_on_ORBSLAM2
服务端
源码来自deeplabv2_server/server.py
。
服务端接收客户端传送的图像(字节串),然后进行语义分割probs = inference.runSegModel(img)
,再将语义分割的结果传送回客户端。
注:使用的语义分割模型可以根据需求自己选择,需要注意环境的配置。
#!/usr/bin/env python
# coding=utf-8
from socket import *
import cv2
import matplotlib.pyplot as plt
import numpy as np
import inference
HOST = '127.0.0.1'
PORT = 8080
ADDR = (HOST,PORT)
def recvall(sock, count):
buf = b'' # buf是一个bytes(字节串)
while count:
# 接受TCP套接字的数据。数据以字符串形式返回,count指定要接收的最大数据量.
newbuf = sock.recv(count)
if not newbuf:
return None
buf += newbuf
count -= len(newbuf)
print(count)
return buf
# 语义分割模型初始化
inference.init()
# 创建AF_INET地址族,TCP的套接字
with socket(AF_INET,SOCK_STREAM) as tcpSerSock:
# 绑定ip和端口
tcpSerSock.bind(ADDR)
# 监听端口,是否有请求
tcpSerSock.listen(5)
print("Listen on port",PORT)
while True:
# accept() 是阻塞的
tcpClientSock,addr = tcpSerSock.accept()
print("The client: ",addr,"is connecting.")
with tcpClientSock:
# 使用一个while循环,持续和客户端通信,直到客户端断开连接或者崩溃
while True:
# 接收图像大小
count = recvall(tcpClientSock,16)
if not count:
break
print("The file size: ",(int)(count))
# 接收图像
stringData = recvall(tcpClientSock,(int)(count))
if not stringData:
break
# 字节串转换成uint8类型数组
data = np.frombuffer(stringData, np.uint8)
# 将uint8类型数组解码成图像
img = cv2.imdecode(data, cv2.IMREAD_COLOR)
print(img.shape)
# 图像语义分割返回“概率图”
probs = inference.runSegModel(img)
print(type(probs)) # <class 'numpy.ndarray'>
print(probs.shape)
print(probs.dtype) # numpy中元素类型为float32
# print(np.max(probs,0))
# “概率图”转化成字符串
stringProbs = probs.tostring()
# 注意是客户端的套接字
# 传送字符串的长度
tcpClientSock.sendall(str.encode(str(len(stringProbs)).ljust(16)))
# 传送字符串
tcpClientSock.sendall(stringProbs)
#客户端退出
print("client ",addr,"is disconnected.")
客户端
源码来自src/ImageSeg.cc
。
创建了ImageSeg
类专门用于进行图像的语义分割。ImageSeg::connectServer()
用于与服务端建立连接,ImageSeg::handleImage(const cv::Mat& imRGB)
用于传送图像给服务端并接收返回的语义分割结果。
#include "ImageSeg.h"
namespace ORB_SLAM2
{
// public
ImageSeg::ImageSeg():mClientSockfd(-1), mIsFirstImage(true){}
ImageSeg::~ImageSeg(){
this->closeClientSockfd();
}
bool ImageSeg::connectServer()
{
int client_sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (client_sockfd == -1) {
perror("Socket error");
return false;
}
struct sockaddr_in client_addr;
bzero(&client_addr, sizeof(client_addr));
client_addr.sin_family = AF_INET;
client_addr.sin_port = htons(Global::PORT);
inet_pton(AF_INET,Global::HOSTADDR.c_str(), &client_addr.sin_addr.s_addr);
int ret = connect(client_sockfd, (struct sockaddr*)&client_addr, sizeof(client_addr));
if (ret == -1) {
perror("Connect error");
return false;
}
char ipbuf[128];
printf("Connect successfully! client iP: %s, port: %d\n", inet_ntop(AF_INET, &client_addr.sin_addr.s_addr, ipbuf,
sizeof(ipbuf)), ntohs(client_addr.sin_port));
mClientSockfd = client_sockfd;
return true;
}
void ImageSeg::handleImage(const cv::Mat& imRGB)
{
char* buf = new char[BUFFERSIZE];
int bufLength = this->transMatToCharArray(buf, imRGB);
cout << bufLength << endl;
// int转char数组
char lenStrSend[16];
stringstream ss;
ss << bufLength;
ss >> lenStrSend;
for (int i = strlen(lenStrSend); i < sizeof(lenStrSend) ; i++) {
lenStrSend[i] = ' ';
}
send(mClientSockfd, lenStrSend, sizeof(lenStrSend), 0);
send(mClientSockfd, buf, bufLength, 0);
delete [] buf;
char lenStrRec[16];
// 阻塞读取
int len = recv(mClientSockfd, lenStrRec, sizeof(lenStrRec), 0);
if (!isRecvSuccess(len)) {
return;
}
int size;
stringstream stream(lenStrRec);
stream >> size;
char* recBuf = new char[BUFFERSIZE];
len = 0;
do {
int l = recv(mClientSockfd, recBuf+len, BUFFERSIZE-len, 0);
if (!isRecvSuccess(l)) {
return;
}
len += l;
}while(len < size);
for (int c = 0; c < Global::CHANNELS; c++) {
for (int h = 0; h < Global::HEIGHT; h++) {
for (int w = 0; w < Global::WIDTH; w++) {
int index = c*Global::HEIGHT*Global::WIDTH + h*Global::WIDTH + w;
char c_heading[4];
for (int i = 0; i < 4; i++) { // float四个字节
c_heading[i] = recBuf[index*4+i];
}
if (mIsFirstImage) {
mProbMap.push_back(*((float*)(c_heading)));
}else {
mProbMap[index] = *((float*)(c_heading));
}
}
}
}
mIsFirstImage = false;
delete [] recBuf;
}
void ImageSeg::closeClientSockfd()
{
if (mClientSockfd != -1) {
close(mClientSockfd);
mClientSockfd = -1;
}
}
const vector<float>& ImageSeg::getProbMap()
{
return mProbMap;
}
// private
bool ImageSeg::isRecvSuccess(int len)
{
if (len == -1) {
perror("Read error");
}else if (len == 0){
printf("Without receiving data's Length from server.");
}else{
return true;
}
this->closeClientSockfd();
return false;
}
int ImageSeg::transMatToCharArray(char* modelImage, const cv::Mat& imRGB)
{
// 将cv::Mat数据编码成数据流
std::vector<uchar> buff;
cv::imencode(".png", imRGB, buff);
memset(modelImage, 0, BUFFERSIZE);
memcpy(modelImage, reinterpret_cast<char*>(&buff[0]), buff.size());
return buff.size();
}
}
ORBSLAM2中使用ImageSeg类
- 语义建图初始化时,与服务端建立连接。
PointCloudMappingClient::PointCloudMappingClient(string savePCDPath, double thprob, double thdepth): mCx(0), mCy(0), mFx(0), mFy(0), mbShutdown(false), mbFinish(false), mSavePCDPath(savePCDPath), mThdepth(thdepth), mThprob(thprob)
{
this->initColorMap();
mImgSeg.connectServer(); // 连接服务端
mPointCloud = boost::make_shared<PointCloud>(); // 用boost::make_shared<> ?
viewerThread = std::make_shared<std::thread>(&PointCloudMappingClient::showPointCloud, this);
}
- 生成点云时,进行语义分割,并返回概率图用一维vector保存。
void PointCloudMappingClient::generatePointCloud(const cv::Mat& imRGB, cv::Mat& imD, const cv::Mat& pose, int nId)
{
std::cout << "Converting image: " << nId << std::endl;
std::chrono::steady_clock::time_point t1 = std::chrono::steady_clock::now();
mImgSeg.handleImage(imRGB); // 将图像发送给服务端进行语义分割,并返回概率图
vector<float> probMap = mImgSeg.getProbMap(); // 获取从服务端返回的概率图
this->mergePointCloudFromImage(probMap, imRGB, imD, pose); // 合并每一帧图像生成的点云
std::chrono::steady_clock::time_point t2 = std::chrono::steady_clock::now();
double t = std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count();
std::cout << "Cost = " << t << std::endl;
}
- 语义地图构建原理参见:基于ORB-SLAM2的语义地图构建