GOTURN(Generic Object Tracking Using Regression Networks ECCV2016)

23 篇文章 0 订阅
1 篇文章 0 订阅

This tracker is able to track objects at 100 fps. This real-time speed is due to two factors.
First, most previous neural network trackers are trained online. However, training neural networks is a slow process, leading to slow tracking. In contrast, our tracker is trained offline to learn a generic relationship between appearance and motion, so no online training is required.
Second, most trackers take a classification-based approah, classifying many image patches to find the target object. In contrast, this tracker used a regression-based approach, requiring just a single feed-forward pass through the network to regresses directly to the location of the target object.
The combination of offline training and one-pass regression leads to a significant speed-up compared to previous approaches and allows it to track objects at real-time speeds.

下面这个是train使用的网络,跟deploy有点不一样哦。
这里写图片描述

这里写图片描述

自己根据这个模型和网络实现的test过程,使用的是vot2014的数据库进行的测试,在测试过程中,在序列检测的过程中使用除第一帧(使用gounding truth)外,其他都是使用前一帧的结果进行target的获取,这使用过程中,发现,GOTURN回出现当相似的物体相遇之后,跟踪有时候回出现跳转到另外一个物体(分析:可以是在序列过程中误差的叠加造成forward时候的target一些偏差导致跟踪错误),还有,对于可形变物体跟踪效果也不好。(使用的时候要注意,输出的结果要除以10,才是得到的是左上角,和右下角相对于patch的位置)。

#include <string>
#include <vector>

#include "boost/algorithm/string.hpp"
#include "google/protobuf/text_format.h"
#include <opencv2/opencv.hpp>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/common.hpp"
#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"
#include "caffe/util/io.hpp"
#include <stdio.h>
#include <malloc.h>
#include <fstream>
#include <boost/progress.hpp>
#include <fstream>
#include <stdio.h>


#include <boost/math/special_functions/next.hpp>
#include <boost/random.hpp>

#include <limits>

#include "caffe/common.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"


//#include "caffe/util/math_functions.hpp"

using caffe::Blob;
using caffe::Caffe;
using caffe::Datum;
using caffe::Net;
using caffe::Layer;
using std::string;
namespace db = caffe::db;

void loaddata_target(boost::shared_ptr<Net<float> >& net, std::string image_path);
void loaddata_image(boost::shared_ptr<Net<float> >& net, std::string image_path);

void loaddata_target(boost::shared_ptr<Net<float> >& net, cv::Mat image);
void loaddata_image(boost::shared_ptr<Net<float> >& net, cv::Mat image);

//int lap(int x1_min,int x1_max,int x2_min,int x2_max);
int main(int argc, char** argv){
  Caffe::set_mode(Caffe::GPU);
  boost::shared_ptr<Net<float> > net(new Net<float>(argv[1], caffe::TEST));
  net->CopyTrainedLayersFromBinaryProto(argv[2]);

  //argv[3]表示的是包含pictures的文件夹
  cv::Mat target, image;
  cv::Mat pre_image, cur_image;
  cv::Point point1, point2;
  float coord;
  char* filepath = new char[100];
  std::ifstream in;//;(string(string(argv[3])));
  sprintf(filepath, "%s/groundtruth.txt",argv[3]);
  in.open(filepath);
  delete [] filepath;
  in >> coord;
  in >> coord;
  in >> coord;
  point1.x = int(coord);
  in >> coord;
  point1.y = int(coord);
  in >> coord;
  in >> coord;
  in >> coord;
  point2.x = int(coord);
  in >> coord;
  point2.y = int(coord);
  in.close();

  char* image_path = new char[100];
  int count = 1;
  int cols,rows;
  int w,h;
  while(1){
      //std::cout << point1 << " " << point2 << std::endl;
      sprintf(image_path,"%s/%.8d.jpg",argv[3],count++);
      pre_image = cv::imread(image_path);
      //std::cout << image_path << std::endl;

      sprintf(image_path,"%s/%.8d.jpg",argv[3],count++);
      cur_image = cv::imread(image_path);

      cols = cur_image.cols;
      rows = cur_image.rows;

      if(cols == 0 || rows == 0) break;

      w = point2.x - point1.x;
      h = point2.y - point1.y;

      //std::cout << w <<" h=" << h << std::endl;

      point1.x = point1.x - w/2;
      point1.y = point1.y - h/2;
      point2.x = point2.x + w/2;
      point2.y = point2.y + h/2;

      //std::cout << point1 << " " << point2 << std::endl;

      point1.x = point1.x < 0 ? 0 : point1.x;
      point1.y = point1.y < 0 ? 0 : point1.y;
      point2.x = point2.x > cols ? cols : point2.x;
      point2.y = point2.y > rows ? rows : point2.y;

     // std::cout << point1 << " " << point2 << std::endl;

      target = pre_image(cv::Rect(point1,point2));
      image  = cur_image(cv::Rect(point1,point2));

      loaddata_target(net,target);
      loaddata_image(net,image);
      net->Forward();

      Blob<float>* output_layer = net->output_blobs()[0];
      std::vector<int> points;
      points.push_back(int(image.cols*((output_layer->cpu_data()[0]/10))));
      points.push_back(int(image.rows*((output_layer->cpu_data()[1]/10))));
      points.push_back(int(image.cols*((output_layer->cpu_data()[2]/10))));
      points.push_back(int(image.rows*((output_layer->cpu_data()[3]/10))));

      point2.x = point1.x + points[2];
      point2.y = point1.y+ points[3];
      point1.x  += points[0];
      point1.y  += points[1];

      cv::rectangle(cur_image, cv::Rect(point1,point2),cv::Scalar(0,225,255),1);

      cv::imshow("target",target);

      cv::imshow("image",cur_image);
      if(count == 3)
          cv::waitKey(0);
      cv::waitKey(33);


      //std::cout << image_path << std::endl;
      //target = pre_image();
     // break;
  }
  delete [] image_path;
  return 1;
}

void loaddata_target(boost::shared_ptr<Net<float> >& net, std::string image_path){
  Blob<float>* input_layer = net->input_blobs()[0];
  int width, height;
  width = input_layer->width();
  height = input_layer->height();
  int size = width*height;
  cv::Mat image = cv::imread(image_path,-1);
  cv::Mat image_resized;
  cv::resize(image, image_resized, cv::Size(height, width));
  float* input_data = input_layer->mutable_cpu_data();
  int temp,idx;
  for(int i = 0; i < height; ++i){
    uchar* pdata = image_resized.ptr<uchar>(i);
    for(int j = 0; j < width; ++j){
      temp = 3*j;
      idx = i*width+j;
      input_data[idx] = (pdata[temp+2]);
      input_data[idx+size] = (pdata[temp+1]);
      input_data[idx+2*size] = (pdata[temp+0]);
    }
  }
  //cv::imshow("image",image_resized);
}

void loaddata_image(boost::shared_ptr<Net<float> >& net, std::string image_path){
  Blob<float>* input_layer = net->input_blobs()[1];
  int width, height;
  width = input_layer->width();
  height = input_layer->height();
  int size = width*height;
  cv::Mat image = cv::imread(image_path,-1);
  cv::Mat image_resized;
  cv::resize(image, image_resized, cv::Size(height, width));
  float* input_data = input_layer->mutable_cpu_data();
  int temp,idx;
  for(int i = 0; i < height; ++i){
    uchar* pdata = image_resized.ptr<uchar>(i);
    for(int j = 0; j < width; ++j){
      temp = 3*j;
      idx = i*width+j;
      input_data[idx] = (pdata[temp+2]);
      input_data[idx+size] = (pdata[temp+1]);
      input_data[idx+2*size] = (pdata[temp+0]);
    }
  }
  //cv::waitKey(0);
  //cv::imshow("image",image_resized);
}

void loaddata_target(boost::shared_ptr<Net<float> >& net, cv::Mat image){
  Blob<float>* input_layer = net->input_blobs()[0];
  int width, height;
  width = input_layer->width();
  height = input_layer->height();
  int size = width*height;
  //cv::Mat image = cv::imread(image_path,-1);
  cv::Mat image_resized;
  cv::resize(image, image_resized, cv::Size(height, width));
  float* input_data = input_layer->mutable_cpu_data();
  int temp,idx;
  for(int i = 0; i < height; ++i){
    uchar* pdata = image_resized.ptr<uchar>(i);
    for(int j = 0; j < width; ++j){
      temp = 3*j;
      idx = i*width+j;
      input_data[idx] = (pdata[temp+2])-104;
      input_data[idx+size] = (pdata[temp+1])-117;
      input_data[idx+2*size] = (pdata[temp+0])-123;
    }
  }
  //cv::imshow("image",image_resized);
}

void loaddata_image(boost::shared_ptr<Net<float> >& net, cv::Mat image){
  Blob<float>* input_layer = net->input_blobs()[1];
  int width, height;
  width = input_layer->width();
  height = input_layer->height();
  int size = width*height;
  //cv::Mat image = cv::imread(image_path,-1);
  cv::Mat image_resized;
  cv::resize(image, image_resized, cv::Size(height, width));
  float* input_data = input_layer->mutable_cpu_data();
  int temp,idx;
  for(int i = 0; i < height; ++i){
    uchar* pdata = image_resized.ptr<uchar>(i);
    for(int j = 0; j < width; ++j){
      temp = 3*j;
      idx = i*width+j;
      input_data[idx] = (pdata[temp+2])-104;
      input_data[idx+size] = (pdata[temp+1])-117;
      input_data[idx+2*size] = (pdata[temp+0])-123;
    }
  }
  //cv::waitKey(0);
  //cv::imshow("image",image_resized);
}

deploy.prototxt

name: "CaffeNet"

input: "target"
input: "image"

#target
input_dim: 1
input_dim: 3
input_dim: 227
input_dim: 227

#image
input_dim: 1
input_dim: 3
input_dim: 227
input_dim: 227

layer {
  name: "conv1" type: "Convolution" bottom: "target" top: "conv1" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 0 }
  }
}
layer {
  name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" }
layer {
  name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 }
}
layer {
  name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 }
}
layer {
  name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 1 }
  }
}
layer {
  name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" }
layer {
  name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 }
}
layer {
  name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 }
}
layer {
  name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 0 }
  }
}
layer {
  name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" }
layer {
  name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 1 }
  }
}
layer {
  name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" }
layer {
  name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 1 }
  }
}
layer {
  name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" }
layer {
  name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 }
}

layer {
  name: "conv1_p" type: "Convolution" bottom: "image" top: "conv1_p" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 0 }
  }
}
layer {
  name: "relu1_p" type: "ReLU" bottom: "conv1_p" top: "conv1_p" }
layer {
  name: "pool1_p" type: "Pooling" bottom: "conv1_p" top: "pool1_p" pooling_param { pool: MAX kernel_size: 3 stride: 2 }
}
layer {
  name: "norm1_p" type: "LRN" bottom: "pool1_p" top: "norm1_p" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 }
}
layer {
  name: "conv2_p" type: "Convolution" bottom: "norm1_p" top: "conv2_p" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 1 }
  }
}
layer {
  name: "relu2_p" type: "ReLU" bottom: "conv2_p" top: "conv2_p" }
layer {
  name: "pool2_p" type: "Pooling" bottom: "conv2_p" top: "pool2_p" pooling_param { pool: MAX kernel_size: 3 stride: 2 }
}
layer {
  name: "norm2_p" type: "LRN" bottom: "pool2_p" top: "norm2_p" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 }
}
layer {
  name: "conv3_p" type: "Convolution" bottom: "norm2_p" top: "conv3_p" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 0 }
  }
}
layer {
  name: "relu3_p" type: "ReLU" bottom: "conv3_p" top: "conv3_p" }
layer {
  name: "conv4_p" type: "Convolution" bottom: "conv3_p" top: "conv4_p" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 1 }
  }
}
layer {
  name: "relu4_p" type: "ReLU" bottom: "conv4_p" top: "conv4_p" }
layer {
  name: "conv5_p" type: "Convolution" bottom: "conv4_p" top: "conv5_p" param { lr_mult: 0 decay_mult: 1 }
  param {
    lr_mult: 0 decay_mult: 0 }
  convolution_param {
    num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 1 }
  }
}
layer {
  name: "relu5_p" type: "ReLU" bottom: "conv5_p" top: "conv5_p" }
layer {
  name: "pool5_p" type: "Pooling" bottom: "conv5_p" top: "pool5_p" pooling_param { pool: MAX kernel_size: 3 stride: 2 }
}

layer {
  name: "concat" type: "Concat" bottom: "pool5" bottom: "pool5_p" top: "pool5_concat" concat_param { axis: 1 }
}

layer {
  name: "fc6-new" type: "InnerProduct" bottom: "pool5_concat" top: "fc6" param { lr_mult: 10 decay_mult: 1 }
  param {
    lr_mult: 20 decay_mult: 0 }
  inner_product_param {
    num_output: 4096 weight_filler { type: "gaussian" std: 0.005 }
    bias_filler {
      type: "constant" value: 1 }
  }
}
layer {
  name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" }
layer {
  name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" dropout_param { dropout_ratio: 0.5 }
}
layer {
  name: "fc7-new" type: "InnerProduct" bottom: "fc6" top: "fc7" param { lr_mult: 10 decay_mult: 1 }
  param {
    lr_mult: 20 decay_mult: 0 }
  inner_product_param {
    num_output: 4096 weight_filler { type: "gaussian" std: 0.005 }
    bias_filler {
      type: "constant" value: 1 }
  }
}
layer {
  name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" }
layer {
  name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 }
}
layer {
  name: "fc7-newb" type: "InnerProduct" bottom: "fc7" top: "fc7b" param { lr_mult: 10 decay_mult: 1 }
  param {
    lr_mult: 20 decay_mult: 0 }
  inner_product_param {
    num_output: 4096 weight_filler { type: "gaussian" std: 0.005 }
    bias_filler {
      type: "constant" value: 1 }
  }
}
layer {
  name: "relu7b" type: "ReLU" bottom: "fc7b" top: "fc7b" }
layer {
  name: "drop7b" type: "Dropout" bottom: "fc7b" top: "fc7b" dropout_param { dropout_ratio: 0.5 }
}


layer {
  name: "fc8-shapes" type: "InnerProduct" bottom: "fc7b" top: "fc8" param { lr_mult: 10 decay_mult: 1 }
  param {
    lr_mult: 20 decay_mult: 0 }
  inner_product_param {
    num_output: 4 weight_filler { type: "gaussian" std: 0.01 }
    bias_filler {
      type: "constant" value: 0 }
  }
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值