C++实现客户端与服务器的通信(三)：在远程服务器中处理本地摄像头数据

最新推荐文章于 2024-08-28 09:16:55 发布

wwww1244

最新推荐文章于 2024-08-28 09:16:55 发布

阅读量3.6k

点赞数 3

分类专栏：深度学习实践

本文链接：https://blog.csdn.net/wwww1244/article/details/80976560

版权

深度学习实践专栏收录该内容

10 篇文章 0 订阅

订阅专栏

这次我们要走得稍微远一点，我需要直接从本地摄像头中读取视频流、传送到服务器、经过服务器上的dlib人脸检测算法、返回bounding box并在本地显示。

不过，有了前面数据传输的基础，只要在合适的位置添加一些程序就可以了。

一、图像的序列化

当我们从摄像头中读取出Mat类型的图像数据（img）后，需要首先对其进行序列化使其能够在http下传输，需要借助opencv的imencode函数，程序如下：

std::vector<unsigned char> buffer;
imencode(".jpg", img, buffer);
string src(buffer.begin(), buffer.end());
string base64_src = base64_encode((BYTE const*)src.c_str(), src.length());

图像的解码需要先解码base64字符串，存放到vector<BYTE>中，然后使用opencv的imdecode函数转换为Mat格式，程序如下：

std::vector<BYTE> str_decoded_byte = base64_decode(str_encoded);
Mat mat = imdecode(str_decoded_byte, CV_LOAD_IMAGE_COLOR);

二、bounding box的序列化

这一步本来准备用cJSON的，但是后来想到人脸检测的标注数据格式比较简单，例如程序检测到了一张人脸，那么就可以返回一个这样的字符串：

"127 131 200 204"

上述四个数字分别表示人脸的左、上、右、下位置的像素坐标。如果程序检测到多张人脸，也可以用类似方法：

"127 131 200 204 235 87 309 156"

然后只要通过一个split函数就可以将数据分隔开了。C++的string库并没有提供标准的split函数，所以我们要自己定义一个：

void split(const std::string& s, std::vector<std::string>& v, const std::string& c)
{
  std::string::size_type pos1, pos2;
  pos2 = s.find(c);
  pos1 = 0;
  while(std::string::npos != pos2)
  {
    v.push_back(s.substr(pos1, pos2-pos1));
 
    pos1 = pos2 + c.size();
    pos2 = s.find(c, pos1);
  }
  if(pos1 != s.length())
    v.push_back(s.substr(pos1));
}

三、C++ dlib人脸检测算法

以前用conda或pip安装的dlib只有python接口，要使用C++接口还是需要源码安装。

github源码地址：https://github.com/davisking/dlib

在dlib根目录下依次运行以下指令：

mkdir build
cd build
cmake ..
cmake --build . --config Release
sudo make install
sudo ldconfig

不出意外的话，就可以安装成功了。

dlib人脸检测算法需要先将Mat格式图片转换成dlib图片格式（dlib::array_2d<bgr_pixel>），然后使用dlib人脸检测器做检测：

dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();
dlib::array2d<bgr_pixel> img;  
dlib::assign_image(img, dlib::cv_image<bgr_pixel>(mat));
std::vector<dlib::rectangle> dets = detector(img);

四、修改程序

然后，只要结合上述方法，对程序进行一些修改就可以了，修改后的程序如下：

client test.cpp：

#include <stdio.h>
#include <string.h>
#include <time.h>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>  
#include <opencv2/imgproc/imgproc.hpp>  
#include <opencv2/core/core.hpp>
#include <iostream>

#include "http.h"
#include "base64.h"

using namespace std;

void split(const std::string& s, std::vector<std::string>& v, const std::string& c);

int main(int argc, char *argv[])
{
  CurlHttp curl_http;
  string str_url = "http://10.108.233.26:8003";  // 地址、端口号

  cv::VideoCapture cap(0);
  if(!cap.isOpened())
  {
    cout << "Something Wrong with the Camera!!" << endl;
    return -1;
  }

  cv::Mat frame, img;
  timeval start, end;
  while (1) {

    gettimeofday(&start, NULL);

    cap >> frame;
    if (frame.empty()) {
      cout << "No Image!!";
      continue;
    }

    float scale = 0.5;
    int width = frame.size().width * scale;
    int height = frame.size().height * scale;
    cv::Size size(width, height);
    cv::resize(frame, img, size);

    std::vector<unsigned char> buffer;
    cv::imencode(".jpg", img, buffer);
    string src(buffer.begin(), buffer.end());
    string base64_src  = base64_encode((BYTE const*)src.c_str(), src.length());
    
    string result;
    int res = curl_http.http_post(str_url.c_str(), base64_src.c_str(), &result);
    cout << "[Response]: " << result << '\n';

    vector<string> str_bbox;
    split(result, str_bbox, " ");
    if (str_bbox.size() % 4 != 0) {
      cout << "Response Error!!";
      return -1;
    }
    int num_bbox = str_bbox.size() / 4;
    for(int i = 0; i < num_bbox; i++) {
      int left   = atoi(str_bbox[i*4].c_str());
      int top    = atoi(str_bbox[i*4 + 1].c_str());
      int right  = atoi(str_bbox[i*4 + 2].c_str());
      int bottom = atoi(str_bbox[i*4 + 3].c_str());
      cv::rectangle(img, cv::Point(left, top + 10), cv::Point(right, bottom), cv::Scalar(255, 0, 0), 2);
    }

    cv::imshow("new", img);
    if (cv::waitKey(5) == 27) {
        break;
    }

    gettimeofday(&end, NULL);
    printf("[Time]: %f\n", (double)((end.tv_sec - start.tv_sec)*1000.0 + (end.tv_usec - start.tv_usec)/1000.0));
  }
  
  return 0;
}

void split(const std::string& s, std::vector<std::string>& v, const std::string& c)
{
  std::string::size_type pos1, pos2;
  pos2 = s.find(c);
  pos1 = 0;
  while(std::string::npos != pos2)
  {
    v.push_back(s.substr(pos1, pos2-pos1));
 
    pos1 = pos2 + c.size();
    pos2 = s.find(c, pos1);
  }
  if(pos1 != s.length())
    v.push_back(s.substr(pos1));
}

server test.cpp：

#include <stdio.h>
#include <string.h>
#include <time.h>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>  
#include <opencv2/imgproc/imgproc.hpp>  
#include <opencv2/core/core.hpp>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/opencv.h>
#include <iostream>

#include "mongoose.h"
#include "base64.h"

using namespace std;
using namespace cv;
using namespace dlib;

int env_handler(struct mg_connection *conn);

int main(int argc, char *argv[])
{
  struct mg_server* server;
  server = mg_create_server(NULL);                  // 初始化一个mongoose server
  mg_set_option(server, "listening_port", "8003");  // 设置端口号为8003
  mg_add_uri_handler(server, "/", env_handler);     // 设置回调函数
  printf("Starting on port %s ...\n", mg_get_option(server, "listening_port"));
  while (1) {
    mg_poll_server(server, 100);  // 超时时间（ms）
  }
  mg_destroy_server(&server);
  
  return 0;
}


int env_handler(struct mg_connection *conn) 
{
  static dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();
  static int counter = 0;
  counter++;

  const char * encoded_data = conn->content;  // 服务端收到的消息
  int encoded_len = conn->content_len;        // 服务端收到的消息长度
  string str_encoded(encoded_data, encoded_len);

  std::vector<BYTE> str_decoded_byte = base64_decode(str_encoded);
  Mat mat = imdecode(str_decoded_byte, CV_LOAD_IMAGE_COLOR);

  // 开始人脸检测算法
  dlib::array2d<bgr_pixel> img;  
  dlib::assign_image(img, dlib::cv_image<bgr_pixel>(mat));

  timeval start, end;
  gettimeofday(&start, NULL);
  std::vector<dlib::rectangle> dets = detector(img);
  gettimeofday(&end, NULL);

  std::string detect_result = "";
  for (int i = 0; i < dets.size(); i++)
  {
    if (!detect_result.empty()) detect_result += " ";

    char ptr_result[30];
    sprintf(ptr_result, "%d %d %d %d", (int)dets[i].left(), (int)dets[i].top(), (int)dets[i].right(), (int)dets[i].bottom());
    string str_result(ptr_result);
    detect_result += str_result;
  }

  printf("Counter: %3d, BBOX: %s, Time of Detect: %f\n", counter, 
                                                         detect_result.empty() ? "Null" : detect_result.c_str(), 
                                                         (double)((end.tv_sec - start.tv_sec)*1000.0 + (end.tv_usec - start.tv_usec)/1000.0));
  mg_printf(conn, "%s", detect_result.c_str());
  
  return 0;
}

五、后记

完成上述修改后，运行程序，发现程序运行速度巨慢。经过简单的测试就能发现，dlib人脸检测算法占用了大量的时间（0.6s左右），看一下gpu发现并没有调用，看起来dlib在cpu下运行速度大概就是这样了，而且我还不知道怎么配置dlib支持gpu，豹怒。。

不过程序还是达成了预定的目标，只要将dlib替换成任何需要进行实时检测的其它算法的接口，就可以利用远程服务器来跑算法了。缺点是只有C++接口，遇到python脚本还是没办法。

以后会考虑解决python接口的问题。