【opencv450-samples】train_HOG.cpp 源码学习---- hog+svm

46 篇文章 4 订阅

#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/ml.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/videoio.hpp"
#include <iostream>
#include <time.h>
using namespace cv;
using namespace cv::ml;
using namespace std;
// system("chcp 65001");

vector< float > get_svm_detector(const Ptr< SVM >& svm);
void convert_to_ml(const std::vector< Mat >& train_samples, Mat& trainData);
void load_images(const String& dirname, vector< Mat >& img_lst, bool showImages);
void sample_neg(const vector< Mat >& full_neg_lst, vector< Mat >& neg_lst, const Size& size);
void computeHOGs(const Size wsize, const vector< Mat >& img_lst, vector< Mat >& gradient_lst, bool use_flip);
void test_trained_detector(String obj_det_filename, String test_dir, String videofilename);
//SVM检测器向量
vector< float > get_svm_detector(const Ptr< SVM >& svm)
{
    // 获取支持向量get the support vectors
    Mat sv = svm->getSupportVectors();
    const int sv_total = sv.rows;//支持向量数
    // 获取决策函数 get the decision function
    Mat alpha, svidx;
    double rho = svm->getDecisionFunction(0, alpha, svidx);

    CV_Assert(alpha.total() == 1 && svidx.total() == 1 && sv_total == 1);//
    CV_Assert((alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
        (alpha.type() == CV_32F && alpha.at<float>(0) == 1.f));
    CV_Assert(sv.type() == CV_32F);

    vector< float > hog_detector(sv.cols + 1);//系数
    memcpy(&hog_detector[0], sv.ptr(), sv.cols * sizeof(hog_detector[0]));内存拷贝函数,从源src所指的内存地址的起始位置开始拷贝n个字节到目标dst所指的内存地址的起始位置中。
    hog_detector[sv.cols] = (float)-rho;//偏置
    return hog_detector;//支持向量和偏置
}

/*
* Convert training/testing set to be used by OpenCV Machine Learning algorithms.
* TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.
* Transposition of samples are made if needed.
* 转换训练/测试集以供 OpenCV 机器学习算法使用。
* TrainData 是一个大小为(每个样本的#samples x max(#cols,#rows))的矩阵, 32FC1 类型。
* 如有需要,可进行样本调换。
*/
//构造训练数据
void convert_to_ml(const vector< Mat >& train_samples, Mat& trainData)
{
    //--转换数据
    const int rows = (int)train_samples.size();//样本数
    const int cols = (int)std::max(train_samples[0].cols, train_samples[0].rows);//列数:train_samples可能以列向量或者行向量形式存在
    Mat tmp(1, cols, CV_32FC1); //< 如果需要,用于转置used for transposition if needed
    trainData = Mat(rows, cols, CV_32FC1);//初始化训练数据:  CV_32FC1----灰度图

    for (size_t i = 0; i < train_samples.size(); ++i)
    {
        CV_Assert(train_samples[i].cols == 1 || train_samples[i].rows == 1);//每个样本数据要么是列向量要么是行向量

        if (train_samples[i].cols == 1)
        {
            transpose(train_samples[i], tmp);//转置列向量
            tmp.copyTo(trainData.row((int)i));//添加到训练数据
        }
        else if (train_samples[i].rows == 1)
        {
            train_samples[i].copyTo(trainData.row((int)i));//添加到训练数据
        }
    }
}
//加载图像  :目录路径 , Mat向量, 是否显示
void load_images(const String& dirname, vector< Mat >& img_lst, bool showImages = false)
{
    vector< String > files;//图片路径列表
    glob(dirname, files);//读取文件夹下的所有文件

    for (size_t i = 0; i < files.size(); ++i)
    {
        Mat img = imread(files[i]); // 加载图像
        if (img.empty())
        {
            cout << files[i] << " is invalid!" << endl; //无效图像,跳过 invalid image, skip it.
            continue;
        }

        if (showImages)
        {
            imshow("image", img);//显示图像
            waitKey(1);
        }
        img_lst.push_back(img);//添加到图像向量
    }
}
//采样负样本   : 完整负样本列表,裁剪的负样本列表,矩形框ROI尺寸
void sample_neg(const vector< Mat >& full_neg_lst, vector< Mat >& neg_lst, const Size& size)
{
    Rect box;//ROI矩形框
    box.width = size.width;
    box.height = size.height;

    srand((unsigned int)time(NULL));

    for (size_t i = 0; i < full_neg_lst.size(); i++)//遍历完整负样本向量
        if (full_neg_lst[i].cols > box.width && full_neg_lst[i].rows > box.height)//
        {
            box.x = rand() % (full_neg_lst[i].cols - box.width);//随机矩形框x
            box.y = rand() % (full_neg_lst[i].rows - box.height);//随机矩形框y
            Mat roi = full_neg_lst[i](box);//取负样本的随机矩形框ROI图像
            neg_lst.push_back(roi.clone());//从负样本中裁剪的ROI图像,添加到负样本向量
        }
}
//计算HOG描述子   hog窗口尺寸,
void computeHOGs(const Size wsize, const vector< Mat >& img_lst, vector< Mat >& gradient_lst, bool use_flip)
{
    HOGDescriptor hog;//hog描述子
    hog.winSize = wsize;//hog窗口尺寸
    Mat gray;//灰度图
    vector< float > descriptors;//描述子向量

    for (size_t i = 0; i < img_lst.size(); i++)//遍历样本列表
    {
        if (img_lst[i].cols >= wsize.width && img_lst[i].rows >= wsize.height)//正样本>   负样本=
        {
            Rect r = Rect((img_lst[i].cols - wsize.width) / 2,
                (img_lst[i].rows - wsize.height) / 2,
                wsize.width,
                wsize.height);//从Mat图像中,取正中间hog窗口尺寸的矩形区域
            cvtColor(img_lst[i](r), gray, COLOR_BGR2GRAY);//hog矩形区域灰度图
            hog.compute(gray, descriptors, Size(8, 8), Size(8, 8));//计算灰度图hog描述子. 换了cuda 出错?
            gradient_lst.push_back(Mat(descriptors).clone());//添加灰度图hog描述子到梯度向量列表中
            if (use_flip)//
            {
                flip(gray, gray, 1);//翻转灰度图
                hog.compute(gray, descriptors, Size(8, 8), Size(0, 0));//计算翻转后灰度图的hog描述子
                gradient_lst.push_back(Mat(descriptors).clone());//添加到梯度向量列表中
            }
        }
    }
}
//测试训练的检测器    :检测器对象文件名,  测试目录,视频文件名
void test_trained_detector(String obj_det_filename, String test_dir, String videofilename)
{
    cout << "Testing trained detector..." << endl;//测试训练的检测器
    HOGDescriptor hog;
    hog.load(obj_det_filename);//加载hog检测器

    vector< String > files;//测试文件列表
    glob(test_dir, files);//读取测试目录下所有图像路径

    int delay = 0;
    VideoCapture cap;

    if (videofilename != "")//视频路径非空
    {
        if (videofilename.size() == 1 && isdigit(videofilename[0]))//视频路径是单个数字
            cap.open(videofilename[0] - '0');//
        else
            cap.open(videofilename);//打开视频
    }

    obj_det_filename = "testing " + obj_det_filename;//窗口文本:测试检测器
    namedWindow(obj_det_filename, WINDOW_NORMAL);//创建窗口

    for (size_t i = 0;; i++)
    {
        Mat img;

        if (cap.isOpened())//视频模式
        {
            cap >> img;//读取一帧
            delay = 1;
        }
        else if (i < files.size())//图像模式
        {
            img = imread(files[i]);//打开一张图像
        }

        if (img.empty())
        {
            return;
        }

        vector< Rect > detections;//检测到的矩形区域
        vector< double > foundWeights;//权重向量

        hog.detectMultiScale(img, detections, foundWeights);//多尺度检测
        for (size_t j = 0; j < detections.size(); j++)//遍历检测到的矩形区域
        {
            Scalar color = Scalar(0, foundWeights[j] * foundWeights[j] * 200, 0);//矩形颜色: 绿色越深权重越大
            rectangle(img, detections[j], color, img.cols / 400 + 1);//绘制矩形
        }

        imshow(obj_det_filename, img);//显示检测后的图像

        if (waitKey(delay) == 27)
        {
            return;
        }
    }
}

int main(int argc, char** argv)
{

    const char* keys =
    {
        "{help h|     | show help message}"
        "{pd    |  D:/test/Positive_Sample| path of directory contains positive images}"
        "{nd    |  D:/test/Negative_Sample | path of directory contains negative images }"
        "{td    |   D:/test/blocks/test  | path of directory contains test images}"
        "{tv    |     | test video file name}"
        "{dw    |  512   | width of the detector}"
        "{dh    |  512   | height of the detector}"
        "{f     |false| indicates if the program will generate and use mirrored samples or not}"
        "{d     |false| train twice}"
        "{t     |false| test a trained detector}"
        "{v     |false| visualize training steps}"
        "{fn    |my_detector.yml| file name of trained SVM}"
    };

    CommandLineParser parser(argc, argv, keys);

    if (parser.has("help"))
    {
        parser.printMessage();
        exit(0);
    }

    String pos_dir = parser.get< String >("pd");//正样本目录
    String neg_dir = parser.get< String >("nd");//负样本目录
    String test_dir = parser.get< String >("td");//测试目录
    String obj_det_filename = parser.get< String >("fn");//对象检测器文件路径
    String videofilename = parser.get< String >("tv");//视频文件
    int detector_width = parser.get< int >("dw");//检测器宽度
    int detector_height = parser.get< int >("dh");//检测器高度
    bool test_detector = parser.get< bool >("t");//测试检测器
    bool train_twice = parser.get< bool >("d");//训练两次
    bool visualization = parser.get< bool >("v");//可视化
    bool flip_samples = parser.get< bool >("f");//翻转样本

    if (test_detector)
    {
        test_trained_detector(obj_det_filename, test_dir, videofilename);//测试训练的检测器
        exit(0);
    }

    if (pos_dir.empty() || neg_dir.empty())//正负样本目录非空
    {
        parser.printMessage();
        cout << "参数数量错误Wrong number of parameters.\n\n"
            << "Example command line:\n" << argv[0] << " -dw=64 -dh=128 -pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian64x128.xml -d\n"
            << "\nExample command line for testing trained detector:\n" << argv[0] << " -t -fn=HOGpedestrian64x128.xml -td=/INRIAPerson/Test/pos";
        exit(1);
    }

    vector< Mat > pos_lst, full_neg_lst, neg_lst, gradient_lst;
    vector< int > labels;

    clog << "Positive images are being loaded...";
    load_images(pos_dir, pos_lst, visualization);//加载正样本
    if (pos_lst.size() > 0)
    {
        clog << "...[done] " << pos_lst.size() << " files." << endl;
    }
    else
    {
        clog << "no image in " << pos_dir << endl;
        return 1;
    }

    Size pos_image_size = pos_lst[0].size();//获取正样本尺寸

    if (detector_width && detector_height)
    {
        pos_image_size = Size(detector_width, detector_height);//正样本尺寸:检测器的宽度和高度
        for (size_t i = 0; i < pos_lst.size(); ++i)
        {
            resize(pos_lst[i], pos_lst[i], pos_image_size); //所有正样本尺寸一致//
        }
    }
    else
    {
        for (size_t i = 0; i < pos_lst.size(); ++i)
        {
            if (pos_lst[i].size() != pos_image_size)
            {
                resize(pos_lst[i], pos_lst[i], pos_image_size); //所有正样本尺寸一致
                //cout << "所有正样本应具有相同尺寸All positive images should be same size!" << endl;
                //exit(1);
            }
        }
        // pos_image_size = pos_image_size / 8 * 8;
    }

    clog << "Negative images are being loaded...";
    load_images(neg_dir, full_neg_lst, visualization);//加载负样本
    clog << "...[done] " << full_neg_lst.size() << " files." << endl;

    clog << "Negative images are being processed...";
    sample_neg(full_neg_lst, neg_lst, pos_image_size);//采样负样本
    clog << "...[done] " << neg_lst.size() << " files." << endl;

    clog << "Histogram of Gradients are being calculated for positive images...";
    computeHOGs(pos_image_size, pos_lst, gradient_lst, flip_samples);//计算hog特征向量
    size_t positive_count = gradient_lst.size();//正样本数
    labels.assign(positive_count, +1);//正样本分配标签 +1
    clog << "...[done] ( positive images count : " << positive_count << " )" << endl;

    clog << "Histogram of Gradients are being calculated for negative images...";
    computeHOGs(pos_image_size, neg_lst, gradient_lst, flip_samples);//计算负样本特征向量,追加到gradient_lst
    size_t negative_count = gradient_lst.size() - positive_count;//负样本数
    labels.insert(labels.end(), negative_count, -1);//负样本分配标签-1
    CV_Assert(positive_count < labels.size());
    clog << "...[done] ( negative images count : " << negative_count << " )" << endl;

    Mat train_data;
    convert_to_ml(gradient_lst, train_data);//转换为训练数据

    clog << "Training SVM...";
    Ptr< SVM > svm = SVM::create();//创建支持向量机
    /* Default values to train SVM */
    svm->setCoef0(0.0);
    svm->setDegree(3);
    svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 1e-3));
    svm->setGamma(0);
    svm->setKernel(SVM::LINEAR);
    svm->setNu(0.5);
    svm->setP(0.1); // for EPSILON_SVR, epsilon in loss function?
    svm->setC(0.01); // From paper, soft classifier
    svm->setType(SVM::EPS_SVR); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
    svm->train(train_data, ROW_SAMPLE, labels);//训练svm
    clog << "...[done]" << endl;

    if (train_twice)
    {
        clog << "Testing trained detector on negative images. This might take a few minutes...";
        HOGDescriptor my_hog;
        my_hog.winSize = pos_image_size;

        // Set the trained svm to my_hog
        my_hog.setSVMDetector(get_svm_detector(svm));

        vector< Rect > detections;
        vector< double > foundWeights;

        for (size_t i = 0; i < full_neg_lst.size(); i++)
        {
            if (full_neg_lst[i].cols >= pos_image_size.width && full_neg_lst[i].rows >= pos_image_size.height)
                my_hog.detectMultiScale(full_neg_lst[i], detections, foundWeights);
            else
                detections.clear();

            for (size_t j = 0; j < detections.size(); j++)
            {
                Mat detection = full_neg_lst[i](detections[j]).clone();
                resize(detection, detection, pos_image_size, 0, 0, INTER_LINEAR_EXACT);
                neg_lst.push_back(detection);
            }

            if (visualization)
            {
                for (size_t j = 0; j < detections.size(); j++)
                {
                    rectangle(full_neg_lst[i], detections[j], Scalar(0, 255, 0), 2);
                }
                imshow("testing trained detector on negative images", full_neg_lst[i]);
                waitKey(5);
            }
        }
        clog << "...[done]" << endl;

        gradient_lst.clear();
        clog << "Histogram of Gradients are being calculated for positive images...";
        computeHOGs(pos_image_size, pos_lst, gradient_lst, flip_samples);
        positive_count = gradient_lst.size();
        clog << "...[done] ( positive count : " << positive_count << " )" << endl;

        clog << "Histogram of Gradients are being calculated for negative images...";
        computeHOGs(pos_image_size, neg_lst, gradient_lst, flip_samples);
        negative_count = gradient_lst.size() - positive_count;
        clog << "...[done] ( negative count : " << negative_count << " )" << endl;

        labels.clear();
        labels.assign(positive_count, +1);
        labels.insert(labels.end(), negative_count, -1);

        clog << "Training SVM again...";
        convert_to_ml(gradient_lst, train_data);
        svm->train(train_data, ROW_SAMPLE, labels);
        clog << "...[done]" << endl;
    }

    HOGDescriptor hog;
    hog.winSize = pos_image_size;
    hog.setSVMDetector(get_svm_detector(svm));
    hog.save(obj_det_filename);

    test_trained_detector(obj_det_filename, test_dir, videofilename);

    return 0;
}

 

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值