#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/ml.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/videoio.hpp"
#include <iostream>
#include <time.h>
using namespace cv;
using namespace cv::ml;
using namespace std;
// system("chcp 65001");
vector< float > get_svm_detector(const Ptr< SVM >& svm);
void convert_to_ml(const std::vector< Mat >& train_samples, Mat& trainData);
void load_images(const String& dirname, vector< Mat >& img_lst, bool showImages);
void sample_neg(const vector< Mat >& full_neg_lst, vector< Mat >& neg_lst, const Size& size);
void computeHOGs(const Size wsize, const vector< Mat >& img_lst, vector< Mat >& gradient_lst, bool use_flip);
void test_trained_detector(String obj_det_filename, String test_dir, String videofilename);
//SVM检测器向量
vector< float > get_svm_detector(const Ptr< SVM >& svm)
{
// 获取支持向量get the support vectors
Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;//支持向量数
// 获取决策函数 get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction(0, alpha, svidx);
CV_Assert(alpha.total() == 1 && svidx.total() == 1 && sv_total == 1);//
CV_Assert((alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
(alpha.type() == CV_32F && alpha.at<float>(0) == 1.f));
CV_Assert(sv.type() == CV_32F);
vector< float > hog_detector(sv.cols + 1);//系数
memcpy(&hog_detector[0], sv.ptr(), sv.cols * sizeof(hog_detector[0]));内存拷贝函数,从源src所指的内存地址的起始位置开始拷贝n个字节到目标dst所指的内存地址的起始位置中。
hog_detector[sv.cols] = (float)-rho;//偏置
return hog_detector;//支持向量和偏置
}
/*
* Convert training/testing set to be used by OpenCV Machine Learning algorithms.
* TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.
* Transposition of samples are made if needed.
* 转换训练/测试集以供 OpenCV 机器学习算法使用。
* TrainData 是一个大小为(每个样本的#samples x max(#cols,#rows))的矩阵, 32FC1 类型。
* 如有需要,可进行样本调换。
*/
//构造训练数据
void convert_to_ml(const vector< Mat >& train_samples, Mat& trainData)
{
//--转换数据
const int rows = (int)train_samples.size();//样本数
const int cols = (int)std::max(train_samples[0].cols, train_samples[0].rows);//列数:train_samples可能以列向量或者行向量形式存在
Mat tmp(1, cols, CV_32FC1); //< 如果需要,用于转置used for transposition if needed
trainData = Mat(rows, cols, CV_32FC1);//初始化训练数据: CV_32FC1----灰度图
for (size_t i = 0; i < train_samples.size(); ++i)
{
CV_Assert(train_samples[i].cols == 1 || train_samples[i].rows == 1);//每个样本数据要么是列向量要么是行向量
if (train_samples[i].cols == 1)
{
transpose(train_samples[i], tmp);//转置列向量
tmp.copyTo(trainData.row((int)i));//添加到训练数据
}
else if (train_samples[i].rows == 1)
{
train_samples[i].copyTo(trainData.row((int)i));//添加到训练数据
}
}
}
//加载图像 :目录路径 , Mat向量, 是否显示
void load_images(const String& dirname, vector< Mat >& img_lst, bool showImages = false)
{
vector< String > files;//图片路径列表
glob(dirname, files);//读取文件夹下的所有文件
for (size_t i = 0; i < files.size(); ++i)
{
Mat img = imread(files[i]); // 加载图像
if (img.empty())
{
cout << files[i] << " is invalid!" << endl; //无效图像,跳过 invalid image, skip it.
continue;
}
if (showImages)
{
imshow("image", img);//显示图像
waitKey(1);
}
img_lst.push_back(img);//添加到图像向量
}
}
//采样负样本 : 完整负样本列表,裁剪的负样本列表,矩形框ROI尺寸
void sample_neg(const vector< Mat >& full_neg_lst, vector< Mat >& neg_lst, const Size& size)
{
Rect box;//ROI矩形框
box.width = size.width;
box.height = size.height;
srand((unsigned int)time(NULL));
for (size_t i = 0; i < full_neg_lst.size(); i++)//遍历完整负样本向量
if (full_neg_lst[i].cols > box.width && full_neg_lst[i].rows > box.height)//
{
box.x = rand() % (full_neg_lst[i].cols - box.width);//随机矩形框x
box.y = rand() % (full_neg_lst[i].rows - box.height);//随机矩形框y
Mat roi = full_neg_lst[i](box);//取负样本的随机矩形框ROI图像
neg_lst.push_back(roi.clone());//从负样本中裁剪的ROI图像,添加到负样本向量
}
}
//计算HOG描述子 hog窗口尺寸,
void computeHOGs(const Size wsize, const vector< Mat >& img_lst, vector< Mat >& gradient_lst, bool use_flip)
{
HOGDescriptor hog;//hog描述子
hog.winSize = wsize;//hog窗口尺寸
Mat gray;//灰度图
vector< float > descriptors;//描述子向量
for (size_t i = 0; i < img_lst.size(); i++)//遍历样本列表
{
if (img_lst[i].cols >= wsize.width && img_lst[i].rows >= wsize.height)//正样本> 负样本=
{
Rect r = Rect((img_lst[i].cols - wsize.width) / 2,
(img_lst[i].rows - wsize.height) / 2,
wsize.width,
wsize.height);//从Mat图像中,取正中间hog窗口尺寸的矩形区域
cvtColor(img_lst[i](r), gray, COLOR_BGR2GRAY);//hog矩形区域灰度图
hog.compute(gray, descriptors, Size(8, 8), Size(8, 8));//计算灰度图hog描述子. 换了cuda 出错?
gradient_lst.push_back(Mat(descriptors).clone());//添加灰度图hog描述子到梯度向量列表中
if (use_flip)//
{
flip(gray, gray, 1);//翻转灰度图
hog.compute(gray, descriptors, Size(8, 8), Size(0, 0));//计算翻转后灰度图的hog描述子
gradient_lst.push_back(Mat(descriptors).clone());//添加到梯度向量列表中
}
}
}
}
//测试训练的检测器 :检测器对象文件名, 测试目录,视频文件名
void test_trained_detector(String obj_det_filename, String test_dir, String videofilename)
{
cout << "Testing trained detector..." << endl;//测试训练的检测器
HOGDescriptor hog;
hog.load(obj_det_filename);//加载hog检测器
vector< String > files;//测试文件列表
glob(test_dir, files);//读取测试目录下所有图像路径
int delay = 0;
VideoCapture cap;
if (videofilename != "")//视频路径非空
{
if (videofilename.size() == 1 && isdigit(videofilename[0]))//视频路径是单个数字
cap.open(videofilename[0] - '0');//
else
cap.open(videofilename);//打开视频
}
obj_det_filename = "testing " + obj_det_filename;//窗口文本:测试检测器
namedWindow(obj_det_filename, WINDOW_NORMAL);//创建窗口
for (size_t i = 0;; i++)
{
Mat img;
if (cap.isOpened())//视频模式
{
cap >> img;//读取一帧
delay = 1;
}
else if (i < files.size())//图像模式
{
img = imread(files[i]);//打开一张图像
}
if (img.empty())
{
return;
}
vector< Rect > detections;//检测到的矩形区域
vector< double > foundWeights;//权重向量
hog.detectMultiScale(img, detections, foundWeights);//多尺度检测
for (size_t j = 0; j < detections.size(); j++)//遍历检测到的矩形区域
{
Scalar color = Scalar(0, foundWeights[j] * foundWeights[j] * 200, 0);//矩形颜色: 绿色越深权重越大
rectangle(img, detections[j], color, img.cols / 400 + 1);//绘制矩形
}
imshow(obj_det_filename, img);//显示检测后的图像
if (waitKey(delay) == 27)
{
return;
}
}
}
int main(int argc, char** argv)
{
const char* keys =
{
"{help h| | show help message}"
"{pd | D:/test/Positive_Sample| path of directory contains positive images}"
"{nd | D:/test/Negative_Sample | path of directory contains negative images }"
"{td | D:/test/blocks/test | path of directory contains test images}"
"{tv | | test video file name}"
"{dw | 512 | width of the detector}"
"{dh | 512 | height of the detector}"
"{f |false| indicates if the program will generate and use mirrored samples or not}"
"{d |false| train twice}"
"{t |false| test a trained detector}"
"{v |false| visualize training steps}"
"{fn |my_detector.yml| file name of trained SVM}"
};
CommandLineParser parser(argc, argv, keys);
if (parser.has("help"))
{
parser.printMessage();
exit(0);
}
String pos_dir = parser.get< String >("pd");//正样本目录
String neg_dir = parser.get< String >("nd");//负样本目录
String test_dir = parser.get< String >("td");//测试目录
String obj_det_filename = parser.get< String >("fn");//对象检测器文件路径
String videofilename = parser.get< String >("tv");//视频文件
int detector_width = parser.get< int >("dw");//检测器宽度
int detector_height = parser.get< int >("dh");//检测器高度
bool test_detector = parser.get< bool >("t");//测试检测器
bool train_twice = parser.get< bool >("d");//训练两次
bool visualization = parser.get< bool >("v");//可视化
bool flip_samples = parser.get< bool >("f");//翻转样本
if (test_detector)
{
test_trained_detector(obj_det_filename, test_dir, videofilename);//测试训练的检测器
exit(0);
}
if (pos_dir.empty() || neg_dir.empty())//正负样本目录非空
{
parser.printMessage();
cout << "参数数量错误Wrong number of parameters.\n\n"
<< "Example command line:\n" << argv[0] << " -dw=64 -dh=128 -pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian64x128.xml -d\n"
<< "\nExample command line for testing trained detector:\n" << argv[0] << " -t -fn=HOGpedestrian64x128.xml -td=/INRIAPerson/Test/pos";
exit(1);
}
vector< Mat > pos_lst, full_neg_lst, neg_lst, gradient_lst;
vector< int > labels;
clog << "Positive images are being loaded...";
load_images(pos_dir, pos_lst, visualization);//加载正样本
if (pos_lst.size() > 0)
{
clog << "...[done] " << pos_lst.size() << " files." << endl;
}
else
{
clog << "no image in " << pos_dir << endl;
return 1;
}
Size pos_image_size = pos_lst[0].size();//获取正样本尺寸
if (detector_width && detector_height)
{
pos_image_size = Size(detector_width, detector_height);//正样本尺寸:检测器的宽度和高度
for (size_t i = 0; i < pos_lst.size(); ++i)
{
resize(pos_lst[i], pos_lst[i], pos_image_size); //所有正样本尺寸一致//
}
}
else
{
for (size_t i = 0; i < pos_lst.size(); ++i)
{
if (pos_lst[i].size() != pos_image_size)
{
resize(pos_lst[i], pos_lst[i], pos_image_size); //所有正样本尺寸一致
//cout << "所有正样本应具有相同尺寸All positive images should be same size!" << endl;
//exit(1);
}
}
// pos_image_size = pos_image_size / 8 * 8;
}
clog << "Negative images are being loaded...";
load_images(neg_dir, full_neg_lst, visualization);//加载负样本
clog << "...[done] " << full_neg_lst.size() << " files." << endl;
clog << "Negative images are being processed...";
sample_neg(full_neg_lst, neg_lst, pos_image_size);//采样负样本
clog << "...[done] " << neg_lst.size() << " files." << endl;
clog << "Histogram of Gradients are being calculated for positive images...";
computeHOGs(pos_image_size, pos_lst, gradient_lst, flip_samples);//计算hog特征向量
size_t positive_count = gradient_lst.size();//正样本数
labels.assign(positive_count, +1);//正样本分配标签 +1
clog << "...[done] ( positive images count : " << positive_count << " )" << endl;
clog << "Histogram of Gradients are being calculated for negative images...";
computeHOGs(pos_image_size, neg_lst, gradient_lst, flip_samples);//计算负样本特征向量,追加到gradient_lst
size_t negative_count = gradient_lst.size() - positive_count;//负样本数
labels.insert(labels.end(), negative_count, -1);//负样本分配标签-1
CV_Assert(positive_count < labels.size());
clog << "...[done] ( negative images count : " << negative_count << " )" << endl;
Mat train_data;
convert_to_ml(gradient_lst, train_data);//转换为训练数据
clog << "Training SVM...";
Ptr< SVM > svm = SVM::create();//创建支持向量机
/* Default values to train SVM */
svm->setCoef0(0.0);
svm->setDegree(3);
svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 1e-3));
svm->setGamma(0);
svm->setKernel(SVM::LINEAR);
svm->setNu(0.5);
svm->setP(0.1); // for EPSILON_SVR, epsilon in loss function?
svm->setC(0.01); // From paper, soft classifier
svm->setType(SVM::EPS_SVR); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
svm->train(train_data, ROW_SAMPLE, labels);//训练svm
clog << "...[done]" << endl;
if (train_twice)
{
clog << "Testing trained detector on negative images. This might take a few minutes...";
HOGDescriptor my_hog;
my_hog.winSize = pos_image_size;
// Set the trained svm to my_hog
my_hog.setSVMDetector(get_svm_detector(svm));
vector< Rect > detections;
vector< double > foundWeights;
for (size_t i = 0; i < full_neg_lst.size(); i++)
{
if (full_neg_lst[i].cols >= pos_image_size.width && full_neg_lst[i].rows >= pos_image_size.height)
my_hog.detectMultiScale(full_neg_lst[i], detections, foundWeights);
else
detections.clear();
for (size_t j = 0; j < detections.size(); j++)
{
Mat detection = full_neg_lst[i](detections[j]).clone();
resize(detection, detection, pos_image_size, 0, 0, INTER_LINEAR_EXACT);
neg_lst.push_back(detection);
}
if (visualization)
{
for (size_t j = 0; j < detections.size(); j++)
{
rectangle(full_neg_lst[i], detections[j], Scalar(0, 255, 0), 2);
}
imshow("testing trained detector on negative images", full_neg_lst[i]);
waitKey(5);
}
}
clog << "...[done]" << endl;
gradient_lst.clear();
clog << "Histogram of Gradients are being calculated for positive images...";
computeHOGs(pos_image_size, pos_lst, gradient_lst, flip_samples);
positive_count = gradient_lst.size();
clog << "...[done] ( positive count : " << positive_count << " )" << endl;
clog << "Histogram of Gradients are being calculated for negative images...";
computeHOGs(pos_image_size, neg_lst, gradient_lst, flip_samples);
negative_count = gradient_lst.size() - positive_count;
clog << "...[done] ( negative count : " << negative_count << " )" << endl;
labels.clear();
labels.assign(positive_count, +1);
labels.insert(labels.end(), negative_count, -1);
clog << "Training SVM again...";
convert_to_ml(gradient_lst, train_data);
svm->train(train_data, ROW_SAMPLE, labels);
clog << "...[done]" << endl;
}
HOGDescriptor hog;
hog.winSize = pos_image_size;
hog.setSVMDetector(get_svm_detector(svm));
hog.save(obj_det_filename);
test_trained_detector(obj_det_filename, test_dir, videofilename);
return 0;
}