1999年,大不列颠哥伦比亚大学的David G.Lowe教授在现有基于不变量技术的特征检测方法基础上,提出了一种基于尺度空间的,对图像缩放、旋转、甚至仿射变换保持不变性的图像局部特征描述算子——Sift(尺度不变特征变换),全称是Scale Invariant Feature Transform,并且在2004年,又对Sift算法做了更为系统的完善。
下边这位老爷子就是Lowe。
Sift提取图像的局部特征,在尺度空间寻找极值点,并提取出其位置、尺度、方向信息。Sfit的应用范围包括物体辨别、机器人地图感知与导航、影响拼接、3D模型建立、手势识别、影响追踪等。
Sift特征的特点:
1. 对旋转、尺度缩放、亮度变化保持不变性,对视角变化、噪声等也存在一定程度的稳定性;
2. 独特性,信息量丰富,适用于在海量特征数据中进行快速,准确的匹配;
3. 多量性,即使少数几个物体也可以产生大量的Sfit特征向量;
4. 可扩展性,可以很方便的与其他形式的特征向量进行联合;
本文不再介绍其它算法,自行搜索它们之间的差异、优劣。
为什么选择sift-sift-flann组合?
结合上面所述的 SIFT 特征的特点,如果需求场景的识别目标的角度、尺度、亮度变化不大,那么 SIFT 能检测出特征点是最多的,目标区域的识别率会更高,但是速度很慢。
SURF 算法采用了很多方法来对每一步进行优化从而提高速度。有分析显示在结果效果相当的情况下 SURF 的速度是 SIFT 的 3 倍。SURF 善于处理具有模糊和旋转的图像,但是不善于处理视角变化和关照变化。
项目所需:准确性第一,速度第二,测试识别率在99%以上。记录了视频每帧识别到的目标区域顶点坐标。
代码:
static void write_file(std::string text)
{
boost::mutex::scoped_lock lock(m_io_monitor);
location_out << text << std::endl;
}
static void read_file()
{
std::ifstream infile("location");
std::string line;
while (std::getline(infile, line)){
std::cout << line << std::endl;
}
infile.close();
}
template<class T>
static std::string to_string(const T& value)
{
std::ostringstream ost;
ost << value;
ost.flush();
return ost.str();
}
unsigned long get_ms_diff(boost::xtime& start, boost::xtime& end)
{
boost::xtime::xtime_sec_t start_ms = start.sec * 1000 + start.nsec / 1000000;
boost::xtime::xtime_sec_t end_ms = end.sec * 1000 + end.nsec / 1000000;
return static_cast<unsigned long>(end_ms - start_ms);
}
//计算叉积
static float direct(cv::Point2f i, cv::Point2f j, cv::Point2f k)
{
return (k.x - i.x)*(j.y - i.y) - (j.x - i.x)*(k.y - i.y);
}
//共线时,判断点是否落在线段上
static int onsegment(cv::Point2f a, cv::Point2f b, cv::Point2f c)
{
float minx = min(a.x, b.x);
float maxx = max(a.x, b.x);
float miny = min(a.y, b.y);
float maxy = max(a.y, b.y);
return c.x >= minx&&c.x <= maxx&&c.y >= miny&&c.y <= maxy;
}
//检测线段相交
static bool intersection(cv::Point2f p1, cv::Point2f q1, cv::Point2f p2, cv::Point2f q2)
{
float d1 = direct(p2, q2, p1);
float d2 = direct(p2, q2, q1);
float d3 = direct(p1, q1, p2);
float d4 = direct(p1, q1, q2);
if (d1 == 0 && d2 == 0 && d3 == 0 && d4 == 0)
return false;
else if (d1*d2 < 0 && d3*d4 < 0)
return true;
else if (d1 == 0 && onsegment(p2, q2, p1))
return true;
else if (d2 == 0 && onsegment(p2, q2, q1))
return true;
else if (d3 == 0 && onsegment(p1, q1, p2))
return true;
else if (d4 == 0 && onsegment(p1, q1, q2))
return true;
return false;
}
//检测面积
static bool detection_area(std::vector<cv::Point2f> scene_corners, int objRows, int objCols, double scale)
{
double sceneArea = contourArea(scene_corners, true);
double objectArea = objCols * objRows;
if (sceneArea > scale * objectArea || sceneArea < objectArea / scale)
return false;
return true;
}
//提取待检测目标特征点并计算待检测目标描述符(特征向量)
static std::vector<sift> detection_compute(std::list<std::string> files)
{
std::vector<sift> sifts;
if (files.empty())
return sifts;
std::list<std::string>::iterator itor;
for (itor = files.begin(); itor != files.end(); itor++)
{
cv::Mat img = cv::imread(*itor, 0);
if (img.empty())
continue;
sift object;
object.rows = img.rows;
object.cols = img.cols;
//提取特征点
cv::Ptr<cv::xfeatures2d::SiftFeatureDetector> detector = cv::xfeatures2d::SiftFeatureDetector::create();
detector->detect(img, object.keypoints);
if (object.keypoints.empty())
continue;
//计算描述符(特征向量)
cv::Ptr<cv::xfeatures2d::SiftDescriptorExtractor> descriptor = cv::xfeatures2d::SiftDescriptorExtractor::create();
descriptor->compute(img, object.keypoints, object.descriptors);
sifts.push_back(object);
}
return sifts;
}
//提取待检测目标特征点并计算待检测目标描述符(特征向量)
static sift detection_compute(cv::Mat frame)
{
sift scene;
if (frame.empty())
return scene;
cv::Mat grayFrame;
cv::cvtColor(frame, grayFrame, CV_BGR2GRAY);
//提取特征点
cv::Ptr<cv::xfeatures2d::SiftFeatureDetector> detector = cv::xfeatures2d::SiftFeatureDetector::create();
detector->detect(grayFrame, scene.keypoints);
if (scene.keypoints.empty())
return scene;
//计算描述符(特征向量)
cv::Ptr<cv::xfeatures2d::SiftDescriptorExtractor> descriptor = cv::xfeatures2d::SiftDescriptorExtractor::create();
descriptor->compute(grayFrame, scene.keypoints, scene.descriptors);
return scene;
}
//使用FLANN匹配算子进行匹配
static std::vector<cv::DMatch> match_features(cv::Mat object, cv::Mat scene)
{
cv::FlannBasedMatcher matcher;
std::vector<cv::DMatch> matches;
if (object.empty() || scene.empty())
return matches;
matcher.match(object, scene, matches, cv::Mat());
return matches;
}
//筛选算子
static std::vector<cv::DMatch> filter_features(cv::Mat object, std::vector<cv::DMatch> matches)
{
if (matches.empty())
return matches;
double max_dist = 0;
double min_dist = 100; //最小距离和最大距离
//计算出关键点之间距离的最大值和最小值
for (int i = 0; i < object.rows; i++)
{
double dist = matches[i].distance;
if (dist < min_dist) min_dist = dist;
if (dist > max_dist) max_dist = dist;
}
//存下匹配距离小于0.3*max_dist的点对
std::vector< cv::DMatch > good_matches;
for (int i = 0; i < object.rows; i++)
{
if (matches[i].distance < 0.3 * max_dist)
{
good_matches.push_back(matches[i]);
}
}
return good_matches;
}
//透视变换
static std::vector<cv::Point2f> homography_transform(sift object, sift scene)
{
std::vector<cv::Point2f> scene_corners(4);
if (object.good_matches.size() < 4)
return scene_corners;
std::vector<cv::Point2f> obj;
std::vector<cv::Point2f> sce;
//从匹配成功的匹配对中获取关键点
for (unsigned int i = 0; i < object.matches.size(); i++)
{
obj.push_back(object.keypoints[object.matches[i].queryIdx].pt);
sce.push_back(scene.keypoints[object.matches[i].trainIdx].pt);
}
cv::Mat H = findHomography(obj, sce, CV_RANSAC); //计算透视变换
//从待测图片中获取角点
std::vector<cv::Point2f> obj_corners(4);
obj_corners[0] = cvPoint(0, 0);
obj_corners[1] = cvPoint(object.cols, 0);
obj_corners[2] = cvPoint(object.cols, object.rows);
obj_corners[3] = cvPoint(0, object.rows);
//进行透视变换
perspectiveTransform(obj_corners, scene_corners, H);
return scene_corners;
}
//绘制包围框
static void draw_surround_frame(std::vector<cv::Point2f> scene_corners, cv::Mat& scene)
{
//绘制出角点之间的直线
line(scene, scene_corners[0], scene_corners[1], cv::Scalar(255, 0, 123), 3);
line(scene, scene_corners[1], scene_corners[2], cv::Scalar(255, 0, 123), 3);
line(scene, scene_corners[2], scene_corners[3], cv::Scalar(255, 0, 123), 3);
line(scene, scene_corners[3], scene_corners[0], cv::Scalar(255, 0, 123), 3);
}
//多线程处理
static void threads_process(int index, cv::Mat frame)
{
std::vector<sift> objects = sift_objects;
sift sence = detection_compute(frame);
int areaIndex;
std::vector<sift>::iterator itor;
for (areaIndex = 0, itor = objects.begin(); itor != objects.end(); itor++, areaIndex++)
{
itor->matches = match_features(itor->descriptors, sence.descriptors);
itor->good_matches = filter_features(itor->descriptors, itor->matches);
std::vector<cv::Point2f> scene_corners;
scene_corners = homography_transform(*itor, sence);
//对角线相交检测
if (!intersection(scene_corners[0], scene_corners[2], scene_corners[1], scene_corners[3]))
continue;
//面积检测,缩放比3.0
if (!detection_area(scene_corners, itor->rows, itor->cols, 3.0))
continue;
if (!location_out.is_open())
return;
std::string result;
result += S_AREA;
result += "#$$#";
result += to_string(index);
result += "#$$#";
result += to_string(areaIndex);
result += "#$$#";
result += to_string(scene_corners[0]);
result += "-";
result += to_string(scene_corners[1]);
result += "-";
result += to_string(scene_corners[2]);
result += "-";
result += to_string(scene_corners[3]);
write_file(result);
}
}
//读取视频
void ReadVideo(const cv::String& filename, std::list<std::string> files, int threadcount, int frameindex)
{
cv::VideoCapture capture(filename);
if (!capture.isOpened())
return;
capture.set(CV_CAP_PROP_POS_FRAMES, frameindex);
double fps = capture.get(CV_CAP_PROP_FPS);
sift_objects = detection_compute(files);
pool tp(threadcount);
location_out.open("location", std::ios::out | std::ios::trunc);
location_out << S_FPS << "#$$#" << fps << std::endl;
location_out << S_OBJECT << "#$$#" << sift_objects.size() << std::endl;
cv::Mat frame;
int record_frame = 0;
while (capture.read(frame))
{
if (frame.empty())
break;
if (sift_objects.empty())
continue;
if ((++frameindex - record_frame) * (1000 / fps) < 90)
continue;
record_frame = frameindex;
tp.schedule(boost::bind(threads_process, frameindex, frame));
while (tp.active() > 3)
Sleep(10);
}
capture.release();
tp.wait();
location_out.close();
}