概述
AprilTag是一个视觉基准库,在AR,机器人,相机校准领域广泛使用。通过特定的标志(与二维码相似,但是降低了复杂度以满足实时性要求),可以快速地检测标志,并计算相对位置。
官网:https://april.eecs.umich.edu/software/apriltag.html
源码
AprilTag的官方源码为C版本,Linux环境配置;
下载地址:https://download.csdn.net/download/playezio/16211339
P-Chao将其重构为C++版本,windows环境配置。
下载地址:https://download.csdn.net/download/playezio/16211363
我将在下方详细说明。
C版本官方源码
apriltag主文件
通用文件
示例文件:
需要说明的是,该版本在readme中只说明了支持linux环境下的编译,windows如何编译?看下面:
- 建议如下图结构的vs2015工程,common中是原结构中的通用文件,demo.cpp中的内容是opencv_demo中的内容。
demo.cpp如下所示:
#include <iostream>
#include "opencv2/opencv.hpp"
extern "C" {
#include "apriltag.h"
#include "tag36h11.h"
#include "tag25h9.h"
#include "tag16h5.h"
#include "tagCircle21h7.h"
#include "tagCircle49h12.h"
#include "tagCustom48h12.h"
#include "tagStandard41h12.h"
#include "tagStandard52h13.h"
#include "common/getopt.h"
}
using namespace std;
using namespace cv;
int main(int argc, char *argv[])
{
getopt_t *getopt = getopt_create();
getopt_add_bool(getopt, 'h', "help", 0, "Show this help");
getopt_add_bool(getopt, 'd', "debug", 0, "Enable debugging output (slow)");//关闭debug模式可以加快运行速度
getopt_add_bool(getopt, 'q', "quiet", 0, "Reduce output");
getopt_add_string(getopt, 'f', "family", "tag36h11", "Tag family to use");
getopt_add_int(getopt, 't', "threads", "1", "Use this many CPU threads");
getopt_add_double(getopt, 'x', "decimate", "2.0", "Decimate input image by this factor");
getopt_add_double(getopt, 'b', "blur", "0.0", "Apply low-pass blur to input");
getopt_add_bool(getopt, '0', "refine-edges", 1, "Spend more time trying to align edges of tags");
if (!getopt_parse(getopt, argc, argv, 1) ||
getopt_get_bool(getopt, "help")) {
printf("Usage: %s [options]\n", argv[0]);
getopt_do_usage(getopt);
exit(0);
}
// Initialize camera
/*VideoCapture cap(0);
if (!cap.isOpened()) {
cerr << "Couldn't open video capture device" << endl;
return -1;
}*/
//读取视频进行处理
cv::VideoCapture cap;
cap.open("E:\\experiment\\square.mp4");
// Initialize tag detector with options
apriltag_family_t *tf = NULL;
const char *famname = getopt_get_string(getopt, "family");
if (!strcmp(famname, "tag36h11")) {
tf = tag36h11_create();
}
else if (!strcmp(famname, "tag25h9")) {
tf = tag25h9_create();
}
else if (!strcmp(famname, "tag16h5")) {
tf = tag16h5_create();
}
else if (!strcmp(famname, "tagCircle21h7")) {
tf = tagCircle21h7_create();
}
else if (!strcmp(famname, "tagCircle49h12")) {
tf = tagCircle49h12_create();
}
else if (!strcmp(famname, "tagStandard41h12")) {
tf = tagStandard41h12_create();
}
else if (!strcmp(famname, "tagStandard52h13")) {
tf = tagStandard52h13_create();
}
else if (!strcmp(famname, "tagCustom48h12")) {
tf = tagCustom48h12_create();
}
else {
printf("Unrecognized tag family name. Use e.g. \"tag36h11\".\n");
exit(-1);
}
apriltag_detector_t *td = apriltag_detector_create();
apriltag_detector_add_family(td, tf);
td->quad_decimate = getopt_get_double(getopt, "decimate");
td->quad_sigma = getopt_get_double(getopt, "blur");
td->nthreads = getopt_get_int(getopt, "threads");
td->debug = getopt_get_bool(getopt, "debug");
td->refine_edges = getopt_get_bool(getopt, "refine-edges");
Mat frame, gray;
while (true) {
cap >> frame;
int width = frame.cols;
int height = frame.rows;
resize(frame, frame, Size(width / 2, height / 2));
cvtColor(frame, gray, COLOR_BGR2GRAY);
// Make an image_u8_t header for the Mat data
//image_u8_t im = image_u8_t(gray.cols, gray.rows, gray.cols, gray.data);
image_u8_t im =
{gray.cols,
gray.rows,
gray.cols,
gray.data
};
zarray_t *detections = apriltag_detector_detect(td, &im);
cout << zarray_size(detections) << " tags detected" << endl;
// Draw detection outlines
for (int i = 0; i < zarray_size(detections); i++) {
apriltag_detection_t *det;
zarray_get(detections, i, &det);
line(frame, Point(det->p[0][0], det->p[0][1]),
Point(det->p[1][0], det->p[1][1]),
Scalar(0, 0xff, 0), 2);
line(frame, Point(det->p[0][0], det->p[0][1]),
Point(det->p[3][0], det->p[3][1]),
Scalar(0, 0, 0xff), 2);
line(frame, Point(det->p[1][0], det->p[1][1]),
Point(det->p[2][0], det->p[2][1]),
Scalar(0xff, 0, 0), 2);
line(frame, Point(det->p[2][0], det->p[2][1]),
Point(det->p[3][0], det->p[3][1]),
Scalar(0xff, 0, 0), 2);
stringstream ss;
ss << det->id;
String text = ss.str();
int fontface = FONT_HERSHEY_SCRIPT_SIMPLEX;
double fontscale = 1.0;
int baseline;
Size textsize = getTextSize(text, fontface, fontscale, 2,
&baseline);
putText(frame, text, Point(det->c[0] - textsize.width / 2,
det->c[1] + textsize.height / 2),
fontface, fontscale, Scalar(0xff, 0x99, 0), 2);
}
apriltag_detections_destroy(detections);
imshow("Tag Detections", frame);
if (waitKey(30) >= 0)
break;
}
apriltag_detector_destroy(td);
if (!strcmp(famname, "tag36h11")) {
tag36h11_destroy(tf);
}
else if (!strcmp(famname, "tag25h9")) {
tag25h9_destroy(tf);
}
else if (!strcmp(famname, "tag16h5")) {
tag16h5_destroy(tf);
}
else if (!strcmp(famname, "tagCircle21h7")) {
tagCircle21h7_destroy(tf);
}
else if (!strcmp(famname, "tagCircle49h12")) {
tagCircle49h12_destroy(tf);
}
else if (!strcmp(famname, "tagStandard41h12")) {
tagStandard41h12_destroy(tf);
}
else if (!strcmp(famname, "tagStandard52h13")) {
tagStandard52h13_destroy(tf);
}
else if (!strcmp(famname, "tagCustom48h12")) {
tagCustom48h12_destroy(tf);
}
getopt_destroy(getopt);
return 0;
}
2.仅需配置opencv环境即可,亲测4.0.1版本可用。
3. 需要将源代码更改的地方如下:
image_u8_t im =
{gray.cols,
gray.rows,
gray.cols,
gray.data
};
- pthread安装
项目-管理Nu-Get程序包-搜索pthread-安装
- C2011 “timespec”:“struct”类型重定义
在
#if !defined( PTHREAD_H )
#define PTHREAD_H
下面加上
#define HAVE_STRUCT_TIMESPEC - error LNK2019: 无法解析的外部符号 __imp__timeGetTime@0,该符号在函数**中被引用
导致错误的原因是没有导入函数所需的静态库,两种方法导入静态库
1)在引用头文件下面添加#pragma comment(lib,“winmm.lib”)预加载静态库
2)在vs项目->属性->链接器->输入属性页 附加依赖项添加winmm.lib即可
C++版本官方源码
结构如图
建立如图的vs2015工程
主函数如下
#include "stdafx.h"
#define NOMINMAX
#include <cstdio>
#include <iostream>
#include "getopt.h"
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "TagDetector.h"
#include "DebugImage.h"
#define DEFAULT_TAG_FAMILY "Tag36h11"
typedef struct TagTestOptions {
TagTestOptions() :
show_debug_info(false),
show_timing(false),
show_results(false),
be_verbose(false),
no_images(false),
generate_output_files(false),
params(),
family_str(DEFAULT_TAG_FAMILY),
error_fraction(1) {
}
bool show_debug_info;
bool show_timing;
bool show_results;
bool be_verbose;
bool no_images;
bool generate_output_files;
TagDetectorParams params;
std::string family_str;
double error_fraction;
} TagTestOptions;
void print_usage(const char* tool_name, FILE* output = stderr) {
TagDetectorParams p;
fprintf(output, "\
Usage: %s [OPTIONS] IMAGE1 [IMAGE2 ...]\n\
Run a tool to test tag detection. Options:\n\
-h Show this help message.\n\
-d Show debug images and data during tag detection.\n\
-t Show timing information for tag detection.\n\
-R Show textual results of tag detection.\n\
-v Be verbose (includes -d -t -R).\n\
-x Do not generate any non-debug visuals.\n\
-o Generate debug visuals as output files vs. using X11.\n\
-D Use decimation for segmentation stage.\n\
-S SIGMA Set the original image sigma value (default %.2f).\n\
-s SEGSIGMA Set the segmentation sigma value (default %.2f).\n\
-a THETATHRESH Set the theta threshold for clustering (default %.1f).\n\
-m MAGTHRESH Set the magnitude threshold for clustering (default %.1f).\n\
-V VALUE Set adaptive threshold value for new quad algo (default %f).\n\
-N RADIUS Set adaptive threshold radius for new quad algo (default %d).\n\
-b Refine bad quads using template tracker.\n\
-r Refine all quads using template tracker.\n\
-n Use the new quad detection algorithm.\n\
-f FAMILY Look for the given tag family (default \"%s\")\n\
-e FRACTION Set error detection fraction (default 1)\n",
tool_name,
p.sigma,
p.segSigma,
p.thetaThresh,
p.magThresh,
p.adaptiveThresholdValue,
p.adaptiveThresholdRadius,
DEFAULT_TAG_FAMILY);
fprintf(output, "Known tag families:");
TagFamily::StringArray known = TagFamily::families();
for (size_t i = 0; i < known.size(); ++i) {
fprintf(output, " %s", known[i].c_str());
}
fprintf(output, "\n");
}
TagTestOptions parse_options(int argc, char** argv) {
TagTestOptions opts;
const char* options_str = "hdtRvxoDS:s:a:m:V:N:brnf:e";
int c;
while ((c = getopt(argc, argv, options_str)) != -1) {
switch (c) {
// Reminder: add new options to 'options_str' above and print_usage()!
case 'h': print_usage(argv[0], stdout); exit(0); break;
case 'd': opts.show_debug_info = true; break;
case 't': opts.show_timing = true; break;
case 'R': opts.show_results = true; break;
case 'v': opts.be_verbose = true; break;
case 'x': opts.no_images = true; break;
case 'o': opts.generate_output_files = true; break;
case 'D': opts.params.segDecimate = true; break;
case 'S': opts.params.sigma = atof(optarg); break;
case 's': opts.params.segSigma = atof(optarg); break;
case 'a': opts.params.thetaThresh = atof(optarg); break;
case 'm': opts.params.magThresh = atof(optarg); break;
case 'V': opts.params.adaptiveThresholdValue = atof(optarg); break;
case 'N': opts.params.adaptiveThresholdRadius = atoi(optarg); break;
case 'b': opts.params.refineBad = true; break;
case 'r': opts.params.refineQuads = true; break;
case 'n': opts.params.newQuadAlgorithm = true; break;
case 'f': opts.family_str = optarg; break;
case 'e': opts.error_fraction = atof(optarg); break;
default:
fprintf(stderr, "\n");
print_usage(argv[0], stderr);
exit(1);
}
}
opts.params.adaptiveThresholdRadius += (opts.params.adaptiveThresholdRadius + 1) % 2;
if (opts.be_verbose) {
opts.show_debug_info = opts.show_timing = opts.show_results = true;
}
return opts;
}
int main(int argc, char** argv)
{
const std::string win = "Tag test";
TagTestOptions opts = parse_options(argc, argv);
TagFamily family(opts.family_str);
if (opts.error_fraction >= 0 && opts.error_fraction < 1)
{
family.setErrorRecoveryFraction(opts.error_fraction);
}
TagDetector detector(family, opts.params);
detector.debug = opts.show_debug_info;
detector.debugWindowName = opts.generate_output_files ? "" : win;
if (opts.params.segDecimate && opts.be_verbose) {
std::cout << "Will decimate for segmentation!\n";
}
TagDetectionArray detections;
//for (int i = optind; i<argc; ++i) {
cv::VideoCapture cap;
cap.open("E:\\experiment\\square.mp4");
cv::Mat src;
while (1)
{
//cv::Mat src = cv::imread(argv[i]);
cap >> src;
if (src.empty()) { continue; }
while (std::max(src.rows, src.cols) > 800) {
cv::Mat tmp;
cv::resize(src, tmp, cv::Size(0, 0), 0.5, 0.5);
src = tmp;
}
cv::Point2d opticalCenter(0.5*src.rows, 0.5*src.cols);
if (!detector.debug && !opts.no_images) {
//labelAndWaitForKey(win, "Original", src, ScaleNone, true);
}
clock_t start = clock();
detector.process(src, opticalCenter, detections);
clock_t end = clock();
if (true)
{
if (opts.show_debug_info) std::cout << "\n";
std::cout << "Got " << detections.size() << " detections in "
<< double(end - start) / CLOCKS_PER_SEC << " seconds.\n";
for (size_t i = 0; i<detections.size(); ++i) {
const TagDetection& d = detections[i];
std::cout << " - Detection: id = " << d.id << ", "
<< "code = " << d.code << ", "
<< "rotation = " << d.rotation << "\n";
}
}
if (!opts.no_images) {
cv::Mat img = family.superimposeDetections(src, detections);
labelAndWaitForKey(win, "Detected", img, ScaleNone, true);
}
}
//}
if (opts.show_timing) detector.reportTimers();
return 0;
}
1.需要移除camtest、quetest、gltest
包含main,导致main冲突,无法编译
2.其他错误解决起来很简单,在此不一一赘述了。
原理解析
Apriltag定位算法的主要步骤如下:
1)自适应阈值分割
2)查找轮廓,使用Union-find查找连通域
3)对轮廓进行直线拟合,查找候选的凸四边形
4)对四边形进行解码,识别Tag
5)坐标变换,转换到世界坐标系
自适应阈值分割
首先进行高斯滤波
//相当于滤波
if (td->quad_sigma != 0) {
// compute a reasonable kernel width by figuring that the
// kernel should go out 2 std devs.
//
// max sigma ksz
// 0.499 1 (disabled)
// 0.999 3
// 1.499 5
// 1.999 7
float sigma = fabsf((float) td->quad_sigma);
int ksz = 4 * sigma; // 2 std devs in each direction
if ((ksz & 1) == 0)
ksz++;
if (ksz > 1) {
if (td->quad_sigma > 0) {
// Apply a blur
image_u8_gaussian_blur(quad_im, sigma, ksz);
} else {
// SHARPEN the image by subtracting the low frequency components.
image_u8_t *orig = image_u8_copy(quad_im);
image_u8_gaussian_blur(quad_im, sigma, ksz);
for (int y = 0; y < orig->height; y++) {
for (int x = 0; x < orig->width; x++) {
int vorig = orig->buf[y*orig->stride + x];
int vblur = quad_im->buf[y*quad_im->stride + x];
int v = 2*vorig - vblur;
if (v < 0)
v = 0;
if (v > 255)
v = 255;
quad_im->buf[y*quad_im->stride + x] = (uint8_t) v;
}
}
image_u8_destroy(orig);
}
}
}
高斯滤波之后进行阈值分割。
2016年的论文里面是采用的自适应阈值进行图像分割,主要考虑了光照不均和黑暗照明对图像的影响,提高分割的准确性。自适应阈值的主要思想就是在像素领域内寻找一个合理的阈值进行分割,选取灰度均值和中值都是常见的手法。
在这个基础又重要的步骤,作者采用了一个trick,先将图像进行4x4网格分块,求出每个分块的灰度最大值和最小值,然后对所有分块计算的最大最小灰度值进行一个3邻域最大最小滤波处理,将滤波后的最大最小均值((max+min)/2)作为分块区域的阈值。
分块的目的主要是增加鲁棒性,区域的特征总比单一像素的更加稳定,减少随机噪声的干扰,同时提升计算效率。
轮廓查找
通过自适应阈值后,得到一张二值图像。
接下来就是要寻找可能组成tag标志的轮廓。
连通域查找的简单方法就是计算二值图像中的黑白边缘,但是这样查找的连通域很容易出现两个tag公用一条边时导致连通域查找错误。
因此作者采用了union-find算法来求连通域,每个连通域都有一个唯一的ID。
unionfind_t *uf = unionfind_create(w * h);
if (td->nthreads <= 1) {
for (int y = 0; y < h - 1; y++) {
//每一个具有相同parent的像素点,都是一个联通区域
do_unionfind_line(uf, threshim, h, w, ts, y);
}
} else {
int sz = h - 1;
int chunksize = 1 + sz / (APRILTAG_TASKS_PER_THREAD_TARGET * td->nthreads);
#ifdef _MSC_VER
struct unionfind_task *tasks = (unionfind_task_t *)malloc((sz / chunksize + 1)*sizeof *tasks);
#else
struct unionfind_task tasks[sz / chunksize + 1];
#endif
int ntasks = 0;
for (int i = 0; i < sz; i += chunksize) {
// each task will process [y0, y1). Note that this attaches
// each cell to the right and down, so row y1 *is* potentially modified.
//
// for parallelization, make sure that each task doesn't touch rows
// used by another thread.
tasks[ntasks].y0 = i;
tasks[ntasks].y1 = imin(sz, i + chunksize - 1);
tasks[ntasks].h = h;
tasks[ntasks].w = w;
tasks[ntasks].s = ts;
tasks[ntasks].uf = uf;
tasks[ntasks].im = threshim;
workerpool_add_task(td->wp, do_unionfind_task, &tasks[ntasks]);
ntasks++;
}
workerpool_run(td->wp);
// XXX stitch together the different chunks.
for (int i = 0; i + 1 < ntasks; i++) {
do_unionfind_line(uf, threshim, h, w, ts, tasks[i].y1);
}
}
timeprofile_stamp(td->tp, "unionfind");
边缘区域的聚类
//计算边缘点,每个不同的边缘id不同,id是根据相邻2个uion来确定的
for (int y = 1; y < h-1; y++) {
for (int x = 1; x < w-1; x++) {
uint8_t v0 = threshim->buf[y*ts + x];
if (v0 == 127)
continue;
// XXX don't query this until we know we need it?
//每个元素代码中就是像素在图像中的位置
uint64_t rep0 = unionfind_get_representative(uf, y*w + x);
// whenever we find two adjacent pixels such that one is
// white and the other black, we add the point half-way(为了精确)
// between them to a cluster associated with the unique
// ids of the white and black regions.
//
// We additionally compute the gradient direction (i.e., which
// direction was the white pixel?) Note: if (v1-v0) == 255, then
// (dx,dy) points towards the white pixel. if (v1-v0) == -255, then
// (dx,dy) points towards the black pixel. p.gx and p.gy will thus
// be -255, 0, or 255.
//
// Note that any given pixel might be added to multiple
// different clusters. But in the common case, a given
// pixel will be added multiple times to the same cluster,
// which increases the size of the cluster and thus the
// computational costs.
//
// A possible optimization would be to combine entries
// within the same cluster.
#define DO_CONN(dx, dy) \
if (1) { \
uint8_t v1 = threshim->buf[y*ts + dy*ts + x + dx]; \
\
if (v0 + v1 == 255) { \
uint64_t rep1 = unionfind_get_representative(uf, y*w + dy*w + x + dx); \
uint64_t clusterid; \
//用2个uion区域的root位置结合作为边界的id
if (rep0 < rep1) \
clusterid = (rep1 << 32) + rep0; \
else \
clusterid = (rep0 << 32) + rep1; \
\
/* XXX lousy hash function */ \
uint32_t clustermap_bucket = u64hash_2(clusterid) % nclustermap; \
struct uint64_zarray_entry *entry = clustermap[clustermap_bucket]; \
while (entry && entry->id != clusterid) { \
entry = entry->next; \
} \
\
if (!entry) { \
entry = (uint64_zarray_entry *)calloc(1, sizeof(struct uint64_zarray_entry)); \
entry->id = clusterid; \
entry->cluster = zarray_create(sizeof(struct pt)); \
entry->next = clustermap[clustermap_bucket]; \
clustermap[clustermap_bucket] = entry; \
} \
\
struct pt p; \
//注意这里存放的是2*x,放的是边缘坐标的2倍
// be -255, 0, or 255.
p.x = 2*x + dx; \
p.y = 2*y + dy; \
//direction was the white pixel
//这里的值看出方向都是指向白色区域的
p.gx = dx*((int) v1-v0); \
p.gy = dy*((int) v1-v0); \
zarray_add(entry->cluster, &p); \
} \
}
// do 4 connectivity. NB: Arguments must be [-1, 1] or we'll overflow .gx, .gy
DO_CONN(1, 0);
DO_CONN(0, 1);
// do 8 connectivity
DO_CONN(-1, 1);
DO_CONN(1, 1);
}
}
每个完整的边缘区域都有对应的id,注意的是坐标存放的是2倍值,所以后面计算真实坐标时需要*0.5,同时计算了每个点的梯度。
寻找四边形
轮廓有了之后,对每一个轮廓进行分割,产生一个残差最小的凸四边形,作为tag位置的候选。
这其中最难的是找四边形的四个顶点,如果对于规则的正方形或者矩形来说,其实是相对比较好做的。
但是存在tag存在变形和仿射变化时,就有点难了。
首先对无序的轮廓点按照对重心的角度进行排序,这绝对是一个很棒的ideal。
有了排序的轮廓点,然后就是按部就班的按照顺序选取距离中心点一定范围内的点进行直线拟合,不断迭代索引,计算每条直线的误差总和;对误差总和进行一个低通滤波,使系统更加鲁棒,然后选取误差总和最大的四条直线对应的角点索引作为四边形角点。
然后取角点间的点拟合直线,求得四条直线的角点作为Tag的顶点。(这部分是最耗时的,类似于穷举,虽然是拟合直线,但是次数非常多,需重点优化的部分)。
之后会进入line fit阶段,函数为:
static void do_quad_task(void *p)
这个函数对每一个边缘cluster,进行line fit
for (int cidx = task->cidx0; cidx < task->cidx1; cidx++) {
zarray_t *cluster;
zarray_get(clusters, cidx, &cluster);
//边缘的点数小于最低要求不考虑
if (zarray_size(cluster) < td->qtp.min_cluster_pixels)
continue;
// a cluster should contain only boundary points around the
// tag. it cannot be bigger than the whole screen. (Reject
// large connected blobs that will be prohibitively slow to
// fit quads to.) A typical point along an edge is added three
// times (because it has 3 neighbors). The maximum perimeter
// is 2w+2h.
//里面存的就是边缘点大于图像点数不考虑
if (zarray_size(cluster) > 3*(2*w+2*h)) {
continue;
}
struct quad quad;
memset(&quad, 0, sizeof(struct quad));
if (fit_quad(td, task->im, cluster, &quad)) {
pthread_mutex_lock(&td->mutex);
zarray_add(quads, &quad);
pthread_mutex_unlock(&td->mutex);
}
}
主要函数是:
int fit_quad(apriltag_detector_t *td, image_u8_t *im, zarray_t *cluster, struct quad *quad)
这个函数主要是利用一些算法来判断这个边界是不是一个矩形,首先查找candidate corners,然后用candidate corners的交点作为矩形的顶点,其中利用了梯度,图像矩,PCA,角度限制等方法,
最后将找到的矩形存放;
if (fit_quad(td, task->im, cluster, &quad)) {
pthread_mutex_lock(&td->mutex);
zarray_add(quads, &quad);
pthread_mutex_unlock(&td->mutex);
然后得到了candidate quads 。后面利用解码进一步降低FP。
为了得到更高精度的角点坐标,需要尽可能找到真正的梯度边缘直线。因此对直线上的点进行采样,然后计算搜索采样点在直法向量上梯度最大的点,作为最后进行直线拟合的点(图像真正的梯度边缘)。
在众多的四边形中,需要按照Apriltag的实际几何特征来过滤明显异常的四边形,比如四边形边长比例不能差的太多,两条相邻边的角度不能离90度太远之类的,这就是一些常用异常处理手段,最后筛选出的候选码如下,见图6(选了一张效果和原算法有些差异的图片,可以看出不同算法的差异)。
单应变换
找到的四边形大概率存在仿射变换(单应变换特殊情况),很难找到规则正方形,因此需要将图像投影成完全符合理论上的正方形,此处就涉及到单应变。一是为了解码的需要,二是为了求解姿态。
背后的数学原理很简单(详见多目几何第二版),就是求解对应点之间的一个坐标变换。对于每一个对应的点对,都应该存在如下关系(公式2-1),理论点坐标等于变换矩阵乘以实际图像点坐标。
将公式(2-1)展开得到公式(2-2):
进一步变换,消去尺度因子s,并将H矩阵的元素看做一个列向量,可得到公式(2-3),是不是很熟悉AX = 0:
因为存在尺度因子的影响,H矩阵最后一个元素可以归一化成1,因此H矩阵的自由度为8。为了得到公式(2-3)的解,至少需要4个点对,采用直接线性化可以求出上述方程,得到单应矩阵H。
1)四边形内的每一个点通过上述变换,都能计算出在理论矩形下的唯一坐标(注意尺度的影响),然后可以按照解码规则进行解码。
2)已知相机内参和世界坐标与图像之间的单应矩阵H,求解相机外参,即Apriltag在世界坐标下的位姿。
-----------------------另一种解释----------------------------------
定义单应矩阵为从二维码坐标系中的四个角点的齐次坐标到图像二维码四个角点坐标的映射H,单应矩阵可以通过直接线性变换(DLT,Direct Linear Transform)得到。
二维码的位置和姿态的计算需要额外的信息:相机的焦距和二维码的物理距离。3×3 的单应矩阵可以写成3×4的相机内参矩阵P和4×3的外参矩阵E的积。外参矩阵通常是4×4的,但是在二维码坐标系中的四个角点的坐标在z方向为0,所以就可以移除外参矩阵的第三列元素,使用旋转参数Rij和平移参数Tk来表示截取后的外参矩阵,那么单应矩阵就可以表示为:
由于单应矩阵是两组齐次坐标的映射关系,所以存在一个尺度参数s。将上式展开,得到:
由于旋转矩阵为单位正交矩阵,矩阵中列向量的模为1,利用该约束能够计算得到s。我们计算R矩阵中两个列向量的模长均值,得到s的大小,s的符号必须满足二维码在相机坐标系的前向,也就是s要保证 Tz符号为负。通过 DLT 得到的旋转矩阵不一定具有单位正交的特性,所以最后要对R矩阵做极分解。
解码
编解码方法是Apriltag团队研究出来的,属于特定的方法,我们可以改变的地方不大,因此不做具体解释。想深度了解的,可以查阅Apriltag编解码资料。通过识别固定类型的码型,可进一步排除其他码型的干扰,得到最终的结果。
在二维码的解码过程中,首先将二维码中每一块的坐标通过单应性矩阵映射到图像平面,判断图像平面上映射后的点的像素值是否大于某一个阈值,若大于该阈值,二维码这个坐标上为1,相反,小于该阈值则判定为0。阈值的选取为了满足光照不变性,定义了以下的阈值模型:
该阈值的模型随着图像上坐标的变化而不同,所以能够对不同的光照强度有很强的鲁棒性。
上式具有4个参数,我们使用二维码的边界上的点来训练该模型,因为二维码边界上的像素值都是已知的,一种为高亮度区域,另外一种为暗色区域。分别对这两种区域建立两个模型,对模型进行训练,最后得到某一个点的阈值为该点两个模型预测量的平均值。
输出三维位姿
#include <iostream>
#include "opencv2/opencv.hpp"
extern "C" {
#include "apriltag_pose.h"
#include "apriltag.h"
#include "tag36h11.h"
#include "tag25h9.h"
#include "tag16h5.h"
#include "tagCircle21h7.h"
#include "tagCircle49h12.h"
#include "tagCustom48h12.h"
#include "tagStandard41h12.h"
#include "tagStandard52h13.h"
#include "common/getopt.h"
}
using namespace std;
using namespace cv;
int main(int argc, char *argv[])
{
getopt_t *getopt = getopt_create();
getopt_add_bool(getopt, 'h', "help", 0, "Show this help");
getopt_add_bool(getopt, 'd', "debug", 0, "Enable debugging output (slow)");//关闭debug模式可以加快运行速度
getopt_add_bool(getopt, 'q', "quiet", 0, "Reduce output");
getopt_add_string(getopt, 'f', "family", "tag36h11", "Tag family to use");
getopt_add_int(getopt, 't', "threads", "1", "Use this many CPU threads");
getopt_add_double(getopt, 'x', "decimate", "2.0", "Decimate input image by this factor");
getopt_add_double(getopt, 'b', "blur", "0.0", "Apply low-pass blur to input");
getopt_add_bool(getopt, '0', "refine-edges", 1, "Spend more time trying to align edges of tags");
if (!getopt_parse(getopt, argc, argv, 1) ||
getopt_get_bool(getopt, "help")) {
printf("Usage: %s [options]\n", argv[0]);
getopt_do_usage(getopt);
exit(0);
}
/***********输入标定的相机参数*************/
//需要对吊舱进行标定,得到精确的标定参数
apriltag_detection_info_t info; // parameters of the camera calibrations 在这里把标定得到的四元参数输入到程序里
info.tagsize = 1; //标识的实际尺寸
info.fx = 2458.40834; //f是相机的焦距
info.fy = 2534.00612;
info.cx = 958.731363; //[cx,cy]是相机的光学中心(主点,principal point)
info.cy = 539.785126;
// Initialize camera
/*VideoCapture cap(0);
if (!cap.isOpened()) {
cerr << "Couldn't open video capture device" << endl;
return -1;
}*/
//读取视频进行处理
cv::VideoCapture cap;
cap.open("E:\\experiment\\square.mp4");
// Initialize tag detector with options
apriltag_family_t *tf = NULL;
const char *famname = getopt_get_string(getopt, "family");
if (!strcmp(famname, "tag36h11")) {
tf = tag36h11_create();
}
else if (!strcmp(famname, "tag25h9")) {
tf = tag25h9_create();
}
else if (!strcmp(famname, "tag16h5")) {
tf = tag16h5_create();
}
else if (!strcmp(famname, "tagCircle21h7")) {
tf = tagCircle21h7_create();
}
else if (!strcmp(famname, "tagCircle49h12")) {
tf = tagCircle49h12_create();
}
else if (!strcmp(famname, "tagStandard41h12")) {
tf = tagStandard41h12_create();
}
else if (!strcmp(famname, "tagStandard52h13")) {
tf = tagStandard52h13_create();
}
else if (!strcmp(famname, "tagCustom48h12")) {
tf = tagCustom48h12_create();
}
else {
printf("Unrecognized tag family name. Use e.g. \"tag36h11\".\n");
exit(-1);
}
apriltag_detector_t *td = apriltag_detector_create();//检测器
apriltag_detector_add_family(td, tf);
td->quad_decimate = getopt_get_double(getopt, "decimate");
td->quad_sigma = getopt_get_double(getopt, "blur");
td->nthreads = getopt_get_int(getopt, "threads");
td->debug = getopt_get_bool(getopt, "debug");
td->refine_edges = getopt_get_bool(getopt, "refine-edges");
Mat frame, gray;
while (true) {
cap >> frame;
int width = frame.cols;
int height = frame.rows;
resize(frame, frame, Size(width / 2, height / 2));
cvtColor(frame, gray, COLOR_BGR2GRAY);
// Make an image_u8_t header for the Mat data
//image_u8_t im = image_u8_t(gray.cols, gray.rows, gray.cols, gray.data);
image_u8_t im =
{gray.cols,
gray.rows,
gray.cols,
gray.data
};
zarray_t *detections = apriltag_detector_detect(td, &im);
cout << zarray_size(detections) << " tags detected" << endl;
double local[2][2]; //定义一个二维数组用来存二维码的位置信息
double locar[2][2];
// Draw detection outlines
for (int i = 0; i < zarray_size(detections); i++) {
apriltag_detection_t *det;
zarray_get(detections, i, &det); // 得到二维码的位置信息和id号det
line(frame, Point(det->p[0][0], det->p[0][1]),
Point(det->p[1][0], det->p[1][1]),
Scalar(0, 0xff, 0), 2);
line(frame, Point(det->p[0][0], det->p[0][1]),
Point(det->p[3][0], det->p[3][1]),
Scalar(0, 0, 0xff), 2);
line(frame, Point(det->p[1][0], det->p[1][1]),
Point(det->p[2][0], det->p[2][1]),
Scalar(0xff, 0, 0), 2);
line(frame, Point(det->p[2][0], det->p[2][1]),
Point(det->p[3][0], det->p[3][1]),
Scalar(0xff, 0, 0), 2);
cout << "左上角:" << "x= " << det->p[3][0] << " " << "y= " << det->p[3][1] << endl;
local[i][0] = det->p[3][0];
local[i][1] = det->p[3][1];
locar[i][0] = det->p[2][0];
locar[i][1] = det->p[2][1];
/*********加入位姿估计函数**********/
info.det = det; //det-->info
apriltag_pose_t pose;
estimate_pose_for_tag_homography(&info, &pose);
/************输出三维位置坐标信息***************/
cout << "三维位置坐标:" << "x= " << pose.t->data[0] << " " << "y= " << pose.t->data[1] << " " << "z= " << pose.t->data[2] << endl; //t output
stringstream ss;
ss << det->id;
String text = ss.str();
int fontface = FONT_HERSHEY_SCRIPT_SIMPLEX;
double fontscale = 1.0;
int baseline;
//在矩形框中间加上id号
Size textsize = getTextSize(text, fontface, fontscale, 2,
&baseline);
putText(frame, text, Point(det->c[0] - textsize.width / 2,
det->c[1] + textsize.height / 2),
fontface, fontscale, Scalar(0xff, 0x99, 0), 2);
}
apriltag_detections_destroy(detections);
imshow("Tag Detections", frame);
if (waitKey(30) >= 0)
break;
}
apriltag_detector_destroy(td);
if (!strcmp(famname, "tag36h11")) {
tag36h11_destroy(tf);
}
else if (!strcmp(famname, "tag25h9")) {
tag25h9_destroy(tf);
}
else if (!strcmp(famname, "tag16h5")) {
tag16h5_destroy(tf);
}
else if (!strcmp(famname, "tagCircle21h7")) {
tagCircle21h7_destroy(tf);
}
else if (!strcmp(famname, "tagCircle49h12")) {
tagCircle49h12_destroy(tf);
}
else if (!strcmp(famname, "tagStandard41h12")) {
tagStandard41h12_destroy(tf);
}
else if (!strcmp(famname, "tagStandard52h13")) {
tagStandard52h13_destroy(tf);
}
else if (!strcmp(famname, "tagCustom48h12")) {
tagCustom48h12_destroy(tf);
}
getopt_destroy(getopt);
return 0;
}
使用opencv进行标定得到fx fy cx cy
填到这里
apriltag_detection_info_t info; // parameters of the camera calibrations 在这里把标定得到的四元参数输入到程序里
info.tagsize = 1; //标识的实际尺寸
info.fx = 2458.40834; //f是相机的焦距
info.fy = 2534.00612;
info.cx = 958.731363; //[cx,cy]是相机的光学中心(主点,principal point)
info.cy = 539.785126;