Action Recognition－iDT代码解析

冬后晚晴

于 2018-03-13 10:59:25 发布

阅读量823

点赞数 1

分类专栏： Action Recognition

本文链接：https://blog.csdn.net/weixin_37970694/article/details/79537651

版权

Action Recognition 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

主要参考博文行为识别笔记：iDT算法用法与代码解析以及Improved Dense Trajectory用法及源码分析
上一篇博文iDT论文笔记已经从原理上对iDT算法进行了介绍，本文将重点介绍其代码的使用及解析。源代码下载官网链接iDT代码

代码结构

iDT代码中主要包含以下几个代码文件：

DenseTrackStab.cpp:iDT算法主程序
DenseTrackStab.h:轨迹跟踪的一些参数，以及一些数据结构体的定义
Descriptors.h:特征相关的各种函数
Initialize.h:初始化相关的各种函数
OpticalFlow.h:光流相关的各种函数
Video.cpp:这个程序与iDT算法无关，只是作者提供用来测试两个依赖库opencv和ffmpeg是否安装成功的测试程序。

代码解析

iDT算法主程序代码的大致思路：

读入新的一帧
通过SURF特征和光流计算当前帧和上一帧的投影变换矩阵
使用求得的投影变换矩阵对当前帧进行warp，消除相机运动影响
利用warp后的当前帧图像和上一帧图像计算光流
在各个图像尺度上跟踪轨迹并计算特征
保存当前帧的相关信息，跳到１

DenseTrackStab.cpp

#include "DenseTrackStab.h"
#include "Initialize.h"
#include "Descriptors.h"
#include "OpticalFlow.h"

#include <time.h>

using namespace cv;

int show_track = 0; // set show_track = 1, if you want to visualize the trajectories

int main(int argc, char** argv)
{
    VideoCapture capture;
    char* video = argv[1];
    int flag = arg_parse(argc, argv);
    capture.open(video);

    if(!capture.isOpened()) {
        fprintf(stderr, "Could not initialize capturing..\n");
        return -1;
    }

    int frame_num = 0;
    TrackInfo trackInfo;
    DescInfo hogInfo, hofInfo, mbhInfo;

    InitTrackInfo(&trackInfo, track_length, init_gap);
    InitDescInfo(&hogInfo, 8, false, patch_size, nxy_cell, nt_cell);
    InitDescInfo(&hofInfo, 9, true, patch_size, nxy_cell, nt_cell);
    InitDescInfo(&mbhInfo, 8, false, patch_size, nxy_cell, nt_cell);

    SeqInfo seqInfo;
    InitSeqInfo(&seqInfo, video);

    std::vector<Frame> bb_list;
    if(bb_file) {
        LoadBoundBox(bb_file, bb_list);
        assert(bb_list.size() == seqInfo.length);
    }

    if(flag)
        seqInfo.length = end_frame - start_frame + 1;

//  fprintf(stderr, "video size, length: %d, width: %d, height: %d\n", seqInfo.length, seqInfo.width, seqInfo.height);

    if(show_track == 1)
        namedWindow("DenseTrackStab", 0);

    SurfFeatureDetector detector_surf(200);
    SurfDescriptorExtractor extractor_surf(true, true);

    std::vector<Point2f> prev_pts_flow, pts_flow;
    std::vector<Point2f> prev_pts_surf, pts_surf;
    std::vector<Point2f> prev_pts_all, pts_all;

    std::vector<KeyPoint> prev_kpts_surf, kpts_surf;
    Mat prev_desc_surf, desc_surf;
    Mat flow, human_mask;

    Mat image, prev_grey, grey;

    std::vector<float> fscales(0);
    std::vector<Size> sizes(0);

    std::vector<Mat> prev_grey_pyr(0), grey_pyr(0), flow_pyr(0), flow_warp_pyr(0);
    std::vector<Mat> prev_poly_pyr(0), poly_pyr(0), poly_warp_pyr(0);

    std::vector<std::list<Track> > xyScaleTracks;
    int init_counter = 0; // indicate when to detect new feature points
    while(true) {
        Mat frame;
        int i, j, c;

        // get a new frame
        capture >> frame;
        if(frame.empty())
            break;

        if(frame_num < start_frame || frame_num > end_frame) {
            frame_num++;
            continue;
        }

        if(frame_num == start_frame) {
            image.create(frame.size(), CV_8UC3);
            grey.create(frame.size(), CV_8UC1);
            prev_grey.create(frame.size(), CV_8UC1);

            InitPry(frame, fscales, sizes);

            BuildPry(sizes, CV_8UC1, prev_grey_pyr);
            BuildPry(sizes, CV_8UC1, grey_pyr);
            BuildPry(sizes, CV_32FC2, flow_pyr);
            BuildPry(sizes, CV_32FC2, flow_warp_pyr);

            BuildPry(sizes, CV_32FC(5), prev_poly_pyr);
            BuildPry(sizes, CV_32FC(5), poly_pyr);
            BuildPry(sizes, CV_32FC(5), poly_warp_pyr);

            xyScaleTracks.resize(scale_num);

            frame.copyTo(image);
            cvtColor(image, prev_grey, CV_BGR2GRAY);

            for(int iScale = 0; iScale < scale_num; iScale++) {
                if(iScale == 0)
                    prev_grey.copyTo(prev_grey_pyr[0]);
                else
                    resize(prev_grey_pyr[iScale-1], prev_grey_pyr[iScale], prev_grey_pyr[iScale].size(), 0, 0, INTER_LINEAR);

                // dense sampling feature points
                std::vector<Point2f> points(0);
                DenseSample(prev_grey_pyr[iScale], points, quality, min_distance);

                // save the feature points
                std::list<Track>& tracks = xyScaleTracks[iScale];
                for(i = 0; i < points.size(); i++)
                    tracks.push_back(Track(points[i], trackInfo, hogInfo, hofInfo, mbhInfo));
            }

            // compute polynomial expansion
            my::FarnebackPolyExpPyr(prev_grey, prev_poly_pyr, fscales, 7, 1.5);

            human_mask = Mat::ones(frame.size(), CV_8UC1);
            if(bb_file)
                InitMaskWithBox(human_mask, bb_list[frame_num].BBs);

            detector_surf.detect(prev_grey, prev_kpts_surf, human_mask);
            extractor_surf.compute(prev_grey, prev_kpts_surf, prev_desc_surf);

            frame_num++;
            continue;
        }

        init_counter++;
        frame.copyTo(image);
        cvtColor(image, grey, CV_BGR2GRAY);

        // match surf features
        if(bb_file)
            InitMaskWithBox(human_mask, bb_list[frame_num].BBs);
        detector_surf.detect(grey, kpts_surf, human_mask);
        extractor_surf.compute(grey, kpts_surf, desc_surf);
        ComputeMatch(prev_kpts_surf, kpts_surf, prev_desc_surf, desc_surf, prev_pts_surf, pts_surf);

        // compute optical flow for all scales once
        my::FarnebackPolyExpPyr(grey, poly_pyr, fscales, 7, 1.5);
        my::calcOpticalFlowFarneback(prev_poly_pyr, poly_pyr, flow_pyr, 10, 2);

        MatchFromFlow(prev_grey, flow_pyr[0], prev_pts_flow, pts_flow, human_mask);
        MergeMatch(prev_pts_flow, pts_flow, prev_pts_surf, pts_surf, prev_pts_all, pts_all);

        Mat H = Mat::eye(3, 3, CV_64FC1);
        if(pts_all.size() > 50) {
            std::vector<unsigned char> match_mask;
            Mat temp = findHomography(prev_pts_all, pts_all, RANSAC, 1, match_mask);
            if(countNonZero(Mat(match_mask)) > 25)
                H = temp;
        }

        Mat H_inv = H.inv();
        Mat grey_warp = Mat::zeros(grey.size(), CV_8UC1);
        MyWarpPerspective(prev_grey, grey, grey_warp, H_inv); // warp the second frame

        // compute optical flow for all scales once
        my::FarnebackPolyExpPyr(grey_warp, poly_warp_pyr, fscales, 7, 1.5);
        my::calcOpticalFlowFarneback(prev_poly_pyr, poly_warp_pyr, flow_warp_pyr, 10, 2);

        for(int iScale = 0; iScale < scale_num; iScale++) {
            if(iScale == 0)
                grey.copyTo(grey_pyr[0]);
            else
                resize(grey_pyr[iScale-1], grey_pyr[iScale], grey_pyr[iScale].size(), 0, 0, INTER_LINEAR);

            int width = grey_pyr[iScale].cols;
            int height = grey_pyr[iScale].rows;

            // compute the integral histograms
            DescMat* hogMat = InitDescMat(height+1, width+1, hogInfo.nBins);
            HogComp(prev_grey_pyr[iScale], hogMat->desc, hogInfo);

            DescMat* hofMat = InitDescMat(height+1, width+1, hofInfo.nBins);
            HofComp(flow_warp_pyr[iScale], hofMat->desc, hofInfo);

            DescMat* mbhMatX = InitDescMat(height+1, width+1, mbhInfo.nBins);
            DescMat* mbhMatY = InitDescMat(height+1, width+1, mbhInfo.nBins);
            MbhComp(flow_warp_pyr[iScale], mbhMatX->desc, mbhMatY->desc, mbhInfo);

            // track feature points in each scale separately
            std::list<Track>& tracks = xyScaleTracks[iScale];
            for (std::list<Track>::iterator iTrack = tracks.begin(); iTrack != tracks.end();) {
                int index = iTrack->index;
                Point2f prev_point = iTrack->point[index];
                int x = std::min<int>(std::max<int>(cvRound(prev_point.x), 0), width-1);
                int y = std::min<int>(std::max<int>(cvRound(prev_point.y), 0), height-1);

                Point2f point;
                point.x = prev_point.x + flow_pyr[iScale].ptr<float>(y)[2*x];
                point.y = prev_point.y + flow_pyr[iScale].ptr<float>(y)[2*x+1];

                if(point.x <= 0 || point.x >= width || point.y <= 0 || point.y >= height) {
                    iTrack = tracks.erase(iTrack);
                    continue;
                }

                iTrack->disp[index].x = flow_warp_pyr[iScale].ptr<float>(y)[2*x];
                iTrack->disp[index].y = flow_warp_pyr[iScale].ptr<float>(y)[2*x+1];

                // get the descriptors for the feature point
                RectInfo rect;
                GetRect(prev_point, rect, width, height, hogInfo);
                GetDesc(hogMat, rect, hogInfo, iTrack->hog, index);
                GetDesc(hofMat, rect, hofInfo, iTrack->hof, index);
                GetDesc(mbhMatX, rect, mbhInfo, iTrack->mbhX, index);
                GetDesc(mbhMatY, rect, mbhInfo, iTrack->mbhY, index);
                iTrack->addPoint(point);

                // draw the trajectories at the first scale
                if(show_track == 1 && iScale == 0)
                    DrawTrack(iTrack->point, iTrack->index, fscales[iScale], image);

                // if the trajectory achieves the maximal length
                if(iTrack->index >= trackInfo.length) {
                    std::vector<Point2f> trajectory(trackInfo.length+1);
                    for(int i = 0; i <= trackInfo.length; ++i)
                        trajectory[i] = iTrack->point[i]*fscales[iScale];

                    std::vector<Point2f> displacement(trackInfo.length);
                    for (int i = 0; i < trackInfo.length; ++i)
                        displacement[i] = iTrack->disp[i]*fscales[iScale];

                    float mean_x(0), mean_y(0), var_x(0), var_y(0), length(0);
                    if(IsValid(trajectory, mean_x, mean_y, var_x, var_y, length) && IsCameraMotion(displacement)) {
                        // output the trajectory
                        printf("%d\t%f\t%f\t%f\t%f\t%f\t%f\t", frame_num, mean_x, mean_y, var_x, var_y, length, fscales[iScale]);

                        // for spatio-temporal pyramid
                        printf("%f\t", std::min<float>(std::max<float>(mean_x/float(seqInfo.width), 0), 0.999));
                        printf("%f\t", std::min<float>(std::max<float>(mean_y/float(seqInfo.height), 0), 0.999));
                        printf("%f\t", std::min<float>(std::max<float>((frame_num - trackInfo.length/2.0 - start_frame)/float(seqInfo.length), 0), 0.999));

                        // output the trajectory
                        for (int i = 0; i < trackInfo.length; ++i)
                            printf("%f\t%f\t", displacement[i].x, displacement[i].y);

                        PrintDesc(iTrack->hog, hogInfo, trackInfo);
                        PrintDesc(iTrack->hof, hofInfo, trackInfo);
                        PrintDesc(iTrack->mbhX, mbhInfo, trackInfo);
                        PrintDesc(iTrack->mbhY, mbhInfo, trackInfo);
                        printf("\n");
                    }

                    iTrack = tracks.erase(iTrack);
                    continue;
                }
                ++iTrack;
            }
            ReleDescMat(hogMat);
            ReleDescMat(hofMat);
            ReleDescMat(mbhMatX);
            ReleDescMat(mbhMatY);

            if(init_counter != trackInfo.gap)
                continue;

            // detect new feature points every gap frames
            std::vector<Point2f> points(0);
            for(std::list<Track>::iterator iTrack = tracks.begin(); iTrack != tracks.end(); iTrack++)
                points.push_back(iTrack->point[iTrack->index]);

            DenseSample(grey_pyr[iScale], points, quality, min_distance);
            // save the new feature points
            for(i = 0; i < points.size(); i++)
                tracks.push_back(Track(points[i], trackInfo, hogInfo, hofInfo, mbhInfo));
        }

        init_counter = 0;
        grey.copyTo(prev_grey);
        for(i = 0; i < scale_num; i++) {
            grey_pyr[i].copyTo(prev_grey_pyr[i]);
            poly_pyr[i].copyTo(prev_poly_pyr[i]);
        }

        prev_kpts_surf = kpts_surf;
        desc_surf.copyTo(prev_desc_surf);

        frame_num++;

        if( show_track == 1 ) {
            imshow( "DenseTrackStab", image);
            c = cvWaitKey(3);
            if((char)c == 27) break;
        }
    }

    if( show_track == 1 )
        destroyWindow("DenseTrackStab");

    return 0;
}

DenseTrackStab.h

#ifndef DENSETRACKSTAB_H_
#define DENSETRACKSTAB_H_

#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <opencv/cxcore.h>
#include <ctype.h>
#include <unistd.h>
#include <algorithm>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <fstream>
#include <iostream>
#include <vector>
#include <list>
#include <string>

#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/features2d/features2d.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/nonfree/nonfree.hpp"

using namespace cv;

int start_frame = 0;
int end_frame = INT_MAX;
int scale_num = 8;
const float scale_stride = sqrt(2);
char* bb_file = NULL;

//定义特征描述子参数
int patch_size = 32;//the size of space-time volum:32*32
int nxy_cell = 2;//空间尺寸2*2
int nt_cell = 3;//时间维度３
float epsilon = 0.05;
const float min_flow = 0.4;

// 定义跟踪的参数
double quality = 0.001;
int min_distance = 5;
int init_gap = 1;
int track_length = 15;//最长跟踪帧数:１５帧

// parameters for rejecting trajectory
const float min_var = sqrt(3);
const float max_var = 50;
const float max_dis = 20;
//特征点的位置坐标
typedef struct {
    int x;       // top left corner
    int y;
    int width;
    int height;
}RectInfo;
//定义视频参数
typedef struct {
    int width;   // resolution of the video
    int height;
    int length;  // number of frames
}SeqInfo;

typedef struct {
    int length;  // length of the trajectory
    int gap;     // initialization gap for feature re-sampling 
}TrackInfo;

typedef struct {
    int nBins;   // 直方图的bin数目
    bool isHof; //是否是HoF特征
    int nxCells; // x方向cell的数目
    int nyCells; //y方向cell的数目
    int ntCells;　//t时间方向cell的数目
    int dim;     // 特征描述子的维度信息
    int height;  // size of the block for computing the descriptor
    int width;
}DescInfo; 

// integral histogram for the descriptors
typedef struct {
    int height;
    int width;
    int nBins;
    float* desc;
}DescMat;

class Track
{
public:
    std::vector<Point2f> point;
    std::vector<Point2f> disp;
    std::vector<float> hog;
    std::vector<float> hof;
    std::vector<float> mbhX;
    std::vector<float> mbhY;
    int index;

    Track(const Point2f& point_, const TrackInfo& trackInfo, const DescInfo& hogInfo,
          const DescInfo& hofInfo, const DescInfo& mbhInfo)
        : point(trackInfo.length+1), disp(trackInfo.length), hog(hogInfo.dim*trackInfo.length),
          hof(hofInfo.dim*trackInfo.length), mbhX(mbhInfo.dim*trackInfo.length), mbhY(mbhInfo.dim*trackInfo.length)
    {
        index = 0;
        point[0] = point_;
    }

    void addPoint(const Point2f& point_)
    {
        index++;
        point[index] = point_;
    }
};

class BoundBox
//定义BoundBox共５个参数的信息
{
public:
    Point2f TopLeft;//左上坐标(x,y)
    Point2f BottomRight;//右下坐标(x,y)
    float confidence;//置信度分数confidence

    BoundBox(float a1, float a2, float a3, float a4, float a5)
    {
        TopLeft.x = a1;
        TopLeft.y = a2;
        BottomRight.x = a3;
        BottomRight.y = a4;
        confidence = a5;
    }
};

class Frame
{
public:
    int frameID;
    std::vector<BoundBox> BBs;

    Frame(const int& frame_)
    {
        frameID = frame_;
        BBs.clear();
    }
};

#endif /*DENSETRACKSTAB_H_*/

Descriptors.h

#ifndef DESCRIPTORS_H_
#define DESCRIPTORS_H_

#include "DenseTrackStab.h"
using namespace cv;

// get the rectangle for computing the descriptor
void GetRect(const Point2f& point, RectInfo& rect, const int width, const int height, const DescInfo& descInfo)
{
    int x_min = descInfo.width/2;
    int y_min = descInfo.height/2;
    int x_max = width - descInfo.width;
    int y_max = height - descInfo.height;

    rect.x = std::min<int>(std::max<int>(cvRound(point.x) - x_min, 0), x_max);
    rect.y = std::min<int>(std::max<int>(cvRound(point.y) - y_min, 0), y_max);
    rect.width = descInfo.width;
    rect.height = descInfo.height;
}

// 计算整张图像的灰度直方图
void BuildDescMat(const Mat& xComp, const Mat& yComp, float* desc, const DescInfo& descInfo)
{
    float maxAngle = 360.f;
    int nDims = descInfo.nBins;//直方图bin数目
    //对于HoF特征bin数目＋１
    int nBins = descInfo.isHof ? descInfo.nBins-1 : descInfo.nBins;
    const float angleBase = float(nBins)/maxAngle;

    int step = (xComp.cols+1)*nDims;
    int index = step + nDims;
    for(int i = 0; i < xComp.rows; i++, index += nDims) {
        const float* xc = xComp.ptr<float>(i);
        const float* yc = yComp.ptr<float>(i);

        // summarization of the current line
        std::vector<float> sum(nDims);
        for(int j = 0; j < xComp.cols; j++) {
            float x = xc[j];
            float y = yc[j];
            float mag0 = sqrt(x*x + y*y);
            float mag1;
            int bin0, bin1;

            // 多出的一个bin做如下处理：是否超出设定的光流阈值
            if(descInfo.isHof && mag0 <= min_flow) {
                bin0 = nBins; // the zero bin is the last one
                mag0 = 1.0;
                bin1 = 0;
                mag1 = 0;
            }
            else {
                float angle = fastAtan2(y, x);
                if(angle >= maxAngle) angle -= maxAngle;

                // split the mag to two adjacent bins
                float fbin = angle * angleBase;
                bin0 = cvFloor(fbin);
                bin1 = (bin0+1)%nBins;

                mag1 = (fbin - bin0)*mag0;
                mag0 -= mag1;
            }

            sum[bin0] += mag0;
            sum[bin1] += mag1;

            for(int m = 0; m < nDims; m++, index++)
                desc[index] = desc[index-step] + sum[m];
        }
    }
}

// get a descriptor from the integral histogram
void GetDesc(const DescMat* descMat, RectInfo& rect, DescInfo descInfo, std::vector<float>& desc, const int index)
{
    int dim = descInfo.dim;
    int nBins = descInfo.nBins;
    int height = descMat->height;
    int width = descMat->width;

    int xStride = rect.width/descInfo.nxCells;
    int yStride = rect.height/descInfo.nyCells;
    int xStep = xStride*nBins;
    int yStep = yStride*width*nBins;

    // iterate over different cells
    int iDesc = 0;
    std::vector<float> vec(dim);
    for(int xPos = rect.x, x = 0; x < descInfo.nxCells; xPos += xStride, x++)
    for(int yPos = rect.y, y = 0; y < descInfo.nyCells; yPos += yStride, y++) {
        // get the positions in the integral histogram
        const float* top_left = descMat->desc + (yPos*width + xPos)*nBins;
        const float* top_right = top_left + xStep;
        const float* bottom_left = top_left + yStep;
        const float* bottom_right = bottom_left + xStep;

        for(int i = 0; i < nBins; i++) {
            float sum = bottom_right[i] + top_left[i] - bottom_left[i] - top_right[i];
            vec[iDesc++] = std::max<float>(sum, 0) + epsilon;
        }
    }

    float norm = 0;
    for(int i = 0; i < dim; i++)
        norm += vec[i];
    if(norm > 0) norm = 1./norm;

    int pos = index*dim;
    for(int i = 0; i < dim; i++)
        desc[pos++] = sqrt(vec[i]*norm);
}

// for HOG descriptor
void HogComp(const Mat& img, float* desc, DescInfo& descInfo)
{
    Mat imgX, imgY;
    Sobel(img, imgX, CV_32F, 1, 0, 1);
    Sobel(img, imgY, CV_32F, 0, 1, 1);
    BuildDescMat(imgX, imgY, desc, descInfo);
}

// for HOF descriptor
void HofComp(const Mat& flow, float* desc, DescInfo& descInfo)
{
    Mat flows[2];
    split(flow, flows);
    BuildDescMat(flows[0], flows[1], desc, descInfo);
}

// for MBH descriptor
void MbhComp(const Mat& flow, float* descX, float* descY, DescInfo& descInfo)
{
    Mat flows[2];
    split(flow, flows);

    Mat flowXdX, flowXdY, flowYdX, flowYdY;
    Sobel(flows[0], flowXdX, CV_32F, 1, 0, 1);
    Sobel(flows[0], flowXdY, CV_32F, 0, 1, 1);
    Sobel(flows[1], flowYdX, CV_32F, 1, 0, 1);
    Sobel(flows[1], flowYdY, CV_32F, 0, 1, 1);

    BuildDescMat(flowXdX, flowXdY, descX, descInfo);
    BuildDescMat(flowYdX, flowYdY, descY, descInfo);
}

// check whether a trajectory is valid or not
bool IsValid(std::vector<Point2f>& track, float& mean_x, float& mean_y, float& var_x, float& var_y, float& length)
{
    int size = track.size();
    float norm = 1./size;
    for(int i = 0; i < size; i++) {
        mean_x += track[i].x;
        mean_y += track[i].y;
    }
    mean_x *= norm;
    mean_y *= norm;

    for(int i = 0; i < size; i++) {
        float temp_x = track[i].x - mean_x;
        float temp_y = track[i].y - mean_y;
        var_x += temp_x*temp_x;
        var_y += temp_y*temp_y;
    }
    var_x *= norm;
    var_y *= norm;
    var_x = sqrt(var_x);
    var_y = sqrt(var_y);

    // remove static trajectory
    if(var_x < min_var && var_y < min_var)
        return false;
    // remove random trajectory
    if( var_x > max_var || var_y > max_var )
        return false;

    float cur_max = 0;
    for(int i = 0; i < size-1; i++) {
        track[i] = track[i+1] - track[i];
        float temp = sqrt(track[i].x*track[i].x + track[i].y*track[i].y);

        length += temp;
        if(temp > cur_max)
            cur_max = temp;
    }

    if(cur_max > max_dis && cur_max > length*0.7)
        return false;

    track.pop_back();
    norm = 1./length;
    // normalize the trajectory
    for(int i = 0; i < size-1; i++)
        track[i] *= norm;

    return true;
}

bool IsCameraMotion(std::vector<Point2f>& disp)
{
    float disp_max = 0;
    float disp_sum = 0;
    for(int i = 0; i < disp.size(); ++i) {
        float x = disp[i].x;
        float y = disp[i].y;
        float temp = sqrt(x*x + y*y);

        disp_sum += temp;
        if(disp_max < temp)
            disp_max = temp;
    }

    if(disp_max <= 1)
        return false;

    float disp_norm = 1./disp_sum;
    for (int i = 0; i < disp.size(); ++i)
        disp[i] *= disp_norm;

    return true;
}

// detect new feature points in an image without overlapping to previous points
void DenseSample(const Mat& grey, std::vector<Point2f>& points, const double quality, const int min_distance)
{
    int width = grey.cols/min_distance;
    int height = grey.rows/min_distance;

    Mat eig;
    cornerMinEigenVal(grey, eig, 3, 3);

    double maxVal = 0;
    minMaxLoc(eig, 0, &maxVal);
    const double threshold = maxVal*quality;

    std::vector<int> counters(width*height);
    int x_max = min_distance*width;
    int y_max = min_distance*height;

    for(int i = 0; i < points.size(); i++) {
        Point2f point = points[i];
        int x = cvFloor(point.x);
        int y = cvFloor(point.y);

        if(x >= x_max || y >= y_max)
            continue;
        x /= min_distance;
        y /= min_distance;
        counters[y*width+x]++;
    }

    points.clear();
    int index = 0;
    int offset = min_distance/2;
    for(int i = 0; i < height; i++)
    for(int j = 0; j < width; j++, index++) {
        if(counters[index] > 0)
            continue;

        int x = j*min_distance+offset;
        int y = i*min_distance+offset;

        if(eig.at<float>(y, x) > threshold)
            points.push_back(Point2f(float(x), float(y)));
    }
}

void InitPry(const Mat& frame, std::vector<float>& scales, std::vector<Size>& sizes)
{
    int rows = frame.rows, cols = frame.cols;
    float min_size = std::min<int>(rows, cols);

    int nlayers = 0;
    while(min_size >= patch_size) {
        min_size /= scale_stride;
        nlayers++;
    }

    if(nlayers == 0) nlayers = 1; // at least 1 scale 

    scale_num = std::min<int>(scale_num, nlayers);

    scales.resize(scale_num);
    sizes.resize(scale_num);

    scales[0] = 1.;
    sizes[0] = Size(cols, rows);

    for(int i = 1; i < scale_num; i++) {
        scales[i] = scales[i-1] * scale_stride;
        sizes[i] = Size(cvRound(cols/scales[i]), cvRound(rows/scales[i]));
    }
}

void BuildPry(const std::vector<Size>& sizes, const int type, std::vector<Mat>& grey_pyr)
{
    int nlayers = sizes.size();
    grey_pyr.resize(nlayers);

    for(int i = 0; i < nlayers; i++)
        grey_pyr[i].create(sizes[i], type);
}

void DrawTrack(const std::vector<Point2f>& point, const int index, const float scale, Mat& image)
{
    Point2f point0 = point[0];
    point0 *= scale;

    for (int j = 1; j <= index; j++) {
        Point2f point1 = point[j];
        point1 *= scale;

        line(image, point0, point1, Scalar(0,cvFloor(255.0*(j+1.0)/float(index+1.0)),0), 2, 8, 0);
        point0 = point1;
    }
    circle(image, point0, 2, Scalar(0,0,255), -1, 8, 0);
}

void PrintDesc(std::vector<float>& desc, DescInfo& descInfo, TrackInfo& trackInfo)
{
    int tStride = cvFloor(trackInfo.length/descInfo.ntCells);
    float norm = 1./float(tStride);
    int dim = descInfo.dim;
    int pos = 0;
    for(int i = 0; i < descInfo.ntCells; i++) {
        std::vector<float> vec(dim);
        for(int t = 0; t < tStride; t++)
            for(int j = 0; j < dim; j++)
                vec[j] += desc[pos++];
        for(int j = 0; j < dim; j++)
            printf("%.7f\t", vec[j]*norm);
    }
}

void LoadBoundBox(char* file, std::vector<Frame>& bb_list)
{
    // load the bouding box file
    std::ifstream bbFile(file);
    std::string line;

    while(std::getline(bbFile, line)) {
         std::istringstream iss(line);

        int frameID;
        if (!(iss >> frameID))
            continue;

        Frame cur_frame(frameID);

        float temp;
        std::vector<float> a(0);
        while(iss >> temp)
            a.push_back(temp);

        int size = a.size();

        if(size % 5 != 0)
            fprintf(stderr, "Input bounding box format wrong!\n");

        for(int i = 0; i < size/5; i++)
            cur_frame.BBs.push_back(BoundBox(a[i*5], a[i*5+1], a[i*5+2], a[i*5+3], a[i*5+4]));

        bb_list.push_back(cur_frame);
    }
}

void InitMaskWithBox(Mat& mask, std::vector<BoundBox>& bbs)
{
    int width = mask.cols;
    int height = mask.rows;

    for(int i = 0; i < height; i++) {
        uchar* m = mask.ptr<uchar>(i);
        for(int j = 0; j < width; j++)
            m[j] = 1;
    }

    for(int k = 0; k < bbs.size(); k++) {
        BoundBox& bb = bbs[k];
        for(int i = cvCeil(bb.TopLeft.y); i <= cvFloor(bb.BottomRight.y); i++) {
            uchar* m = mask.ptr<uchar>(i);
            for(int j = cvCeil(bb.TopLeft.x); j <= cvFloor(bb.BottomRight.x); j++)
                m[j] = 0;
        }
    }
}

static void MyWarpPerspective(Mat& prev_src, Mat& src, Mat& dst, Mat& M0, int flags = INTER_LINEAR,
                             int borderType = BORDER_CONSTANT, const Scalar& borderValue = Scalar())
{
    int width = src.cols;
    int height = src.rows;
    dst.create( height, width, CV_8UC1 );

    Mat mask = Mat::zeros(height, width, CV_8UC1);
    const int margin = 5;

    const int BLOCK_SZ = 32;
    short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];

    int interpolation = flags & INTER_MAX;
    if( interpolation == INTER_AREA )
        interpolation = INTER_LINEAR;

    double M[9];
    Mat matM(3, 3, CV_64F, M);
    M0.convertTo(matM, matM.type());
    if( !(flags & WARP_INVERSE_MAP) )
         invert(matM, matM);

    int x, y, x1, y1;

    int bh0 = std::min(BLOCK_SZ/2, height);
    int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
    bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);

    for( y = 0; y < height; y += bh0 ) {
    for( x = 0; x < width; x += bw0 ) {
        int bw = std::min( bw0, width - x);
        int bh = std::min( bh0, height - y);

        Mat _XY(bh, bw, CV_16SC2, XY);
        Mat matA;
        Mat dpart(dst, Rect(x, y, bw, bh));

        for( y1 = 0; y1 < bh; y1++ ) {

            short* xy = XY + y1*bw*2;
            double X0 = M[0]*x + M[1]*(y + y1) + M[2];
            double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
            double W0 = M[6]*x + M[7]*(y + y1) + M[8];
            short* alpha = A + y1*bw;

            for( x1 = 0; x1 < bw; x1++ ) {

                double W = W0 + M[6]*x1;
                W = W ? INTER_TAB_SIZE/W : 0;
                double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
                double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));

                double _X = fX/double(INTER_TAB_SIZE);
                double _Y = fY/double(INTER_TAB_SIZE);

                if( _X > margin && _X < width-1-margin && _Y > margin && _Y < height-1-margin )
                    mask.at<uchar>(y+y1, x+x1) = 1;

                int X = saturate_cast<int>(fX);
                int Y = saturate_cast<int>(fY);

                xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
                xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
                alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (X & (INTER_TAB_SIZE-1)));
            }
        }

        Mat _matA(bh, bw, CV_16U, A);
        remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
    }
    }

    for( y = 0; y < height; y++ ) {
        const uchar* m = mask.ptr<uchar>(y);
        const uchar* s = prev_src.ptr<uchar>(y);
        uchar* d = dst.ptr<uchar>(y);
        for( x = 0; x < width; x++ ) {
            if(m[x] == 0)
                d[x] = s[x];
        }
    }
}

void ComputeMatch(const std::vector<KeyPoint>& prev_kpts, const std::vector<KeyPoint>& kpts,
                  const Mat& prev_desc, const Mat& desc, std::vector<Point2f>& prev_pts, std::vector<Point2f>& pts)
{
    prev_pts.clear();
    pts.clear();

    if(prev_kpts.size() == 0 || kpts.size() == 0)
        return;

    Mat mask = windowedMatchingMask(kpts, prev_kpts, 25, 25);

    BFMatcher desc_matcher(NORM_L2);
    std::vector<DMatch> matches;

    desc_matcher.match(desc, prev_desc, matches, mask);

    prev_pts.reserve(matches.size());
    pts.reserve(matches.size());

    for(size_t i = 0; i < matches.size(); i++) {
        const DMatch& dmatch = matches[i];
        // get the point pairs that are successfully matched
        prev_pts.push_back(prev_kpts[dmatch.trainIdx].pt);
        pts.push_back(kpts[dmatch.queryIdx].pt);
    }

    return;
}

void MergeMatch(const std::vector<Point2f>& prev_pts1, const std::vector<Point2f>& pts1,
                const std::vector<Point2f>& prev_pts2, const std::vector<Point2f>& pts2,
                std::vector<Point2f>& prev_pts_all, std::vector<Point2f>& pts_all)
{
    prev_pts_all.clear();
    prev_pts_all.reserve(prev_pts1.size() + prev_pts2.size());

    pts_all.clear();
    pts_all.reserve(pts1.size() + pts2.size());

    for(size_t i = 0; i < prev_pts1.size(); i++) {
        prev_pts_all.push_back(prev_pts1[i]);
        pts_all.push_back(pts1[i]);
    }

    for(size_t i = 0; i < prev_pts2.size(); i++) {
        prev_pts_all.push_back(prev_pts2[i]);
        pts_all.push_back(pts2[i]); 
    }

    return;
}

void MatchFromFlow(const Mat& prev_grey, const Mat& flow, std::vector<Point2f>& prev_pts, std::vector<Point2f>& pts, const Mat& mask)
{
    int width = prev_grey.cols;
    int height = prev_grey.rows;
    prev_pts.clear();
    pts.clear();

    const int MAX_COUNT = 1000;
    goodFeaturesToTrack(prev_grey, prev_pts, MAX_COUNT, 0.001, 3, mask);

    if(prev_pts.size() == 0)
        return;

    for(int i = 0; i < prev_pts.size(); i++) {
        int x = std::min<int>(std::max<int>(cvRound(prev_pts[i].x), 0), width-1);
        int y = std::min<int>(std::max<int>(cvRound(prev_pts[i].y), 0), height-1);

        const float* f = flow.ptr<float>(y);
        pts.push_back(Point2f(x+f[2*x], y+f[2*x+1]));
    }
}

#endif /*DESCRIPTORS_H_*/

冬后晚晴

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
1
评论
Action Recognition－iDT代码解析

主要参考博文行为识别笔记：iDT算法用法与代码解析以及Improved Dense Trajectory用法及源码分析上一篇博文iDT论文笔记已经从原理上对iDT算法进行了介绍，本文将重点介绍其代码的使用及解析。源代码下载官网链接iDT代码代码结构iDT代码中主要包含以下几个代码文件： DenseTrackStab.cpp:iDT算法主程序 DenseTrackStab.h:轨迹跟
复制链接

扫一扫

专栏目录