FASText(上) : Fast 角点

最新推荐文章于 2023-10-16 22:00:41 发布

PeaceInMind

最新推荐文章于 2023-10-16 22:00:41 发布

阅读量5.7k

点赞数

分类专栏：图像文字检测与识别文章标签：图像角点 CV

本文链接：https://blog.csdn.net/peaceinmind/article/details/51910948

版权

图像同时被 2 个专栏收录

24 篇文章 1 订阅

订阅专栏

文字检测与识别

10 篇文章 41 订阅

订阅专栏

0导语

本文主要介绍论文FASText:Efficient Unconstrained Scene Text Detector[pdf][code],其核心思想是定制化fast角点使其更有利于场景文字的检测。因此本文主要分上下两篇，第一篇先介绍fast角点，并剖析其opencv（2.4.10）的代码，下篇再介绍FSAText.

首先我们直观感受下FASText和Fast角点，参数阈值为12，FASText用的是FASText12，fast的是TYPE_7_12。并且根据论文的数据，它的proposal比MSER要少一半，但是检出率recall要高25%，并且速度还是4倍以上，全面优于MSER。

FASText(3013个角点)

Fast(7924个角点)

1 Fast特征点

1.1角点候选

Fast角点的思想很简单，很早就已经出来了[2],但是请注意opencv(2.4.10)中NMS的做法法跟论文中有些差别,后面我们会看到。

Fast中对特征点的定义是这样的：

如图3，以当前点（绿色点）为中心，某个半径r画圆。考查圆周上的点（红色），假设它们一共有k个。如果红色的点中有连续超过n个点都大于（或小于）当前点像素值加上某一阈值t的和,那么当前坐标就会被认为是角点的候选。opencv中k和n有3种配置，默认的是k = 16,n = 9,还有k=12, n=7和k=8,n=5这两种配置

上面的例子中如果我们假设k=16,n=9,t=10的话，那么我们就可以找到这么一组点（黄色），其满足上面的所有条件

1)在圆周上且是连续的

2)点的个数10,超过9

3)它们的像素值都低于中心点t以上

所以该点就是潜在角点。从原理上看，fast角点的不具备scale不变性，但基本具有旋转不变和平移不变性。

1.2 NMS

再次强调论文和opencv采用的是不同的思路

第二步也是最后一步就是普通的NMS，但是这里面比较麻烦的是这么去评价一个点的好坏，论文中用的是

其中

意思就是先挑选出圆周上所有比中心点大阈值t以上的点集G和比它小阈值t以上的点集L

G = {,} L = {138,135，82，96，109，71，121，117，140，117，74，110，137，141}

求绝对差并减掉t

G = {}, L ={5,8,61,47,34,72,22,26,3,26,69,33,6,2};

求和

G = 0, L = 414

取两者的最大值414作为分数

但是opencv代码中用的是另外一种思路。类似于木桶原理，它选的是符合条件的点集中与中心点相差最小的像素点，并用他们之间的绝对差作为这个角点的分数。上面的例子中，黄色的点与中心点相差最下的是140，因此分数是13.

本文就讲到这，由于本人水平有限，错误与纰漏还请指正。附上opencv的代码，去掉了优化部分，以方便阅读。

<pre name="code" class="cpp">#include <opencv/cv.hpp>
#include <vector>

namespace TestFastKeyPoint
{
	using namespace cv;
	void makeOffsets(int pixel[25], int rowStride, int patternSize)
	{
		static const int offsets16[][2] =
		{
			{ 0, 3 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 3, 0 }, { 3, -1 }, { 2, -2 }, { 1, -3 },
			{ 0, -3 }, { -1, -3 }, { -2, -2 }, { -3, -1 }, { -3, 0 }, { -3, 1 }, { -2, 2 }, { -1, 3 }
		};

		static const int offsets12[][2] =
		{
			{ 0, 2 }, { 1, 2 }, { 2, 1 }, { 2, 0 }, { 2, -1 }, { 1, -2 },
			{ 0, -2 }, { -1, -2 }, { -2, -1 }, { -2, 0 }, { -2, 1 }, { -1, 2 }
		};

		static const int offsets8[][2] =
		{
			{ 0, 1 }, { 1, 1 }, { 1, 0 }, { 1, -1 },
			{ 0, -1 }, { -1, -1 }, { -1, 0 }, { -1, 1 }
		};

		const int(*offsets)[2] = patternSize == 16 ? offsets16 :
			patternSize == 12 ? offsets12 :
			patternSize == 8 ? offsets8 : 0;

		CV_Assert(pixel && offsets);

		int k = 0;
		for (; k < patternSize; k++)
			pixel[k] = offsets[k][0] + offsets[k][1] * rowStride;
		//主要是为了在做NMS的时候方便，因为16个构成一个圆形，如果我从第16个点开始，那么我就可以直接往下遍历这个数组直接获取1-8号点的像素
		for (; k < 25; k++)
			pixel[k] = pixel[k - patternSize];
	}

	template<int patternSize>
	int cornerScore(const uchar* ptr, const int pixel[], int threshold);

	template<>
	int cornerScore<12>(const uchar* ptr, const int pixel[], int threshold)
	{
		const int K = 6, N = K * 3 + 1;
		int k, v = ptr[0];
		short d[N + 4];
		for (k = 0; k < N; k++)
			d[k] = (short)(v - ptr[pixel[k]]);


		int a0 = threshold;
		for (k = 0; k < 12; k += 2)
		{
			int a = std::min((int)d[k + 1], (int)d[k + 2]);
			if (a <= a0)
				continue;
			a = std::min(a, (int)d[k + 3]);
			a = std::min(a, (int)d[k + 4]);
			a = std::min(a, (int)d[k + 5]);
			a = std::min(a, (int)d[k + 6]);
			a0 = std::max(a0, std::min(a, (int)d[k]));
			a0 = std::max(a0, std::min(a, (int)d[k + 7]));
		}

		int b0 = -a0;
		for (k = 0; k < 12; k += 2)
		{
			int b = std::max((int)d[k + 1], (int)d[k + 2]);
			b = std::max(b, (int)d[k + 3]);
			b = std::max(b, (int)d[k + 4]);
			if (b >= b0)
				continue;
			b = std::max(b, (int)d[k + 5]);
			b = std::max(b, (int)d[k + 6]);

			b0 = std::min(b0, std::max(b, (int)d[k]));
			b0 = std::min(b0, std::max(b, (int)d[k + 7]));
		}

		threshold = -b0 - 1;



		return threshold;
	}

	template<>
	int cornerScore<8>(const uchar* ptr, const int pixel[], int threshold)
	{
		const int K = 4, N = K * 3 + 1;
		int k, v = ptr[0];
		short d[N];
		for (k = 0; k < N; k++)
			d[k] = (short)(v - ptr[pixel[k]]);


		int a0 = threshold;
		for (k = 0; k < 8; k += 2)
		{
			int a = std::min((int)d[k + 1], (int)d[k + 2]);
			if (a <= a0)
				continue;
			a = std::min(a, (int)d[k + 3]);
			a = std::min(a, (int)d[k + 4]);
			a0 = std::max(a0, std::min(a, (int)d[k]));
			a0 = std::max(a0, std::min(a, (int)d[k + 5]));
		}

		int b0 = -a0;
		for (k = 0; k < 8; k += 2)
		{
			int b = std::max((int)d[k + 1], (int)d[k + 2]);
			b = std::max(b, (int)d[k + 3]);
			if (b >= b0)
				continue;
			b = std::max(b, (int)d[k + 4]);

			b0 = std::min(b0, std::max(b, (int)d[k]));
			b0 = std::min(b0, std::max(b, (int)d[k + 5]));
		}

		threshold = -b0 - 1;

		return threshold;
	}
	template<>
	int cornerScore<16>(const uchar* ptr, const int pixel[], int threshold)
	{
		const int K = 8, N = K * 3 + 1;
		int k, v = ptr[0];
		short d[N];
		for (k = 0; k < N; k++)
			d[k] = (short)(v - ptr[pixel[k]]);//求像素差

		//正的差
		int a0 = threshold;
		for (k = 0; k < 16; k += 2)
		{
			int a = std::min((int)d[k + 1], (int)d[k + 2]);
			a = std::min(a, (int)d[k + 3]);
			if (a <= a0)
				continue;
			a = std::min(a, (int)d[k + 4]);
			a = std::min(a, (int)d[k + 5]);
			a = std::min(a, (int)d[k + 6]);
			a = std::min(a, (int)d[k + 7]);
			a = std::min(a, (int)d[k + 8]);
			a0 = std::max(a0, std::min(a, (int)d[k]));
			a0 = std::max(a0, std::min(a, (int)d[k + 9]));
		}

		int b0 = -a0;
		for (k = 0; k < 16; k += 2)
		{
			int b = std::max((int)d[k + 1], (int)d[k + 2]);
			b = std::max(b, (int)d[k + 3]);
			b = std::max(b, (int)d[k + 4]);
			b = std::max(b, (int)d[k + 5]);
			if (b >= b0)
				continue;
			b = std::max(b, (int)d[k + 6]);
			b = std::max(b, (int)d[k + 7]);
			b = std::max(b, (int)d[k + 8]);

			b0 = std::min(b0, std::max(b, (int)d[k]));
			b0 = std::min(b0, std::max(b, (int)d[k + 9]));
		}

		threshold = -b0 - 1;

		return threshold;
	}


	template<int patternSize>
	void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
	{
		Mat img = _img.getMat();
		const int K = patternSize / 2, N = patternSize + K + 1;
		int i, j, k, pixel[25];
		makeOffsets(pixel, (int)img.step, patternSize);

		keypoints.clear();

		threshold = std::min(std::max(threshold, 0), 255);
		//两个像素值相差的范围是在-255到255，所以设个512的数组
		//像素相差在阈值内的为0
		//像素大于中心点一定阈值的为2
		//像素小于中心店一定阈值的为1
		uchar threshold_tab[512];
		for (i = -255; i <= 255; i++)
			threshold_tab[i + 255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0);

		AutoBuffer<uchar> _buf((img.cols + 16) * 3 * (sizeof(int)+sizeof(uchar)) + 128);
		uchar* buf[3];
		//存的是分数，由于采用了木桶原理，分数不会超过255，因此用一个字节就可以了
		buf[0] = _buf; buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols;
		int* cpbuf[3];
		cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1;//存的是角点的坐标 1是为了存cornerpos[-1] = ncorners;
		cpbuf[1] = cpbuf[0] + img.cols + 1;
		cpbuf[2] = cpbuf[1] + img.cols + 1;
		memset(buf[0], 0, img.cols * 3);

		for (i = 3; i < img.rows - 2; i++)
		{
			const uchar* ptr = img.ptr<uchar>(i) +3;
			uchar* curr = buf[(i - 3) % 3];
			int* cornerpos = cpbuf[(i - 3) % 3];
			memset(curr, 0, img.cols);
			int ncorners = 0;

			if (i < img.rows - 3)
			{
				j = 3;
				for (; j < img.cols - 3; j++, ptr++)
				{
					int v = ptr[0];
					const uchar* tab = &threshold_tab[0] - v + 255;
					int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]];// examine 1 and 9,  tab 结果是0 说明类似

					if (d == 0)
						continue;

					d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]];//
					d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]];//5 13
					d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]];//7 15

					if (d == 0)
						continue;

					d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]];
					d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]];
					d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]];
					d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]];

					//如果是3说明全是一大一小，最优的情况也只会是只有连续8个大于或小于这个阈值,所以不会是
					if (d & 1)
					{
						int vt = v - threshold, count = 0;

						for (k = 0; k < N; k++)
						{
							int x = ptr[pixel[k]];
							if (x < vt)
							{
								if (++count > K)
								{
									cornerpos[ncorners++] = j;
									if (nonmax_suppression)
										curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
									break;
								}
							}
							else
								count = 0;//重置，保证小于的点都是连续的
						}
					}

					if (d & 2)
					{
						int vt = v + threshold, count = 0;

						for (k = 0; k < N; k++)
						{
							int x = ptr[pixel[k]];
							if (x > vt)
							{
								if (++count > K)
								{
									cornerpos[ncorners++] = j;
									if (nonmax_suppression)
										curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold);
									break;
								}
							}
							else
								count = 0;
						}
					}
				}
			}

			cornerpos[-1] = ncorners;

			if (i == 3)
				continue;

			const uchar* prev = buf[(i - 4 + 3) % 3];//+3是为了防止负数
			const uchar* pprev = buf[(i - 5 + 3) % 3];
			cornerpos = cpbuf[(i - 4 + 3) % 3];
			ncorners = cornerpos[-1];

			for (k = 0; k < ncorners; k++)//检查上一行的角点是否会被NMS抑制掉
			{
				j = cornerpos[k];
				int score = prev[j];
				if (!nonmax_suppression ||
					(score > prev[j + 1] && score > prev[j - 1] &&
					score > pprev[j - 1] && score > pprev[j] && score > pprev[j + 1] &&
					score > curr[j - 1] && score > curr[j] && score > curr[j + 1]))
				{
					keypoints.push_back(KeyPoint((float)j, (float)(i - 1), 7.f, -1, (float)score));
				}
			}
		}
	}

	void FASTX(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression, int type)
	{
		switch (type) {
		case FastFeatureDetector::TYPE_5_8://前面的数都是后面的一半加一
			FAST_t<8>(_img, keypoints, threshold, nonmax_suppression);
			break;
		case FastFeatureDetector::TYPE_7_12:
			FAST_t<12>(_img, keypoints, threshold, nonmax_suppression);
			break;
		case FastFeatureDetector::TYPE_9_16:
			FAST_t<16>(_img, keypoints, threshold, nonmax_suppression);
			break;
		}
	}

	void FAST(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression)
	{
		FASTX(_img, keypoints, threshold, nonmax_suppression, FastFeatureDetector::TYPE_9_16);
	}
}

参考文献

[1]Buta M. FASText: Efficientunconstrained scene text detector[C]//2015 IEEE International Conference onComputer Vision (ICCV). IEEE, 2015: 1206-1214.

[2]Rosten E, Drummond T.Machine learning for high-speed corner detection[C]//European conference oncomputer vision. Springer Berlin Heidelberg, 2006: 430-443.

PeaceInMind

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
6
评论
FASText(上) : Fast 角点

0 导语本文主要介绍论文FASText:Efficient Unconstrained Scene Text Detector[pdf][code],其核心思想是定制化fast角点使其更有利于场景文字的检测。因此本文主要分上下两篇，第一篇先介绍fast角点，并剖析其opencv（2.4.10）的代码，下篇再介绍FSAText.首先我们直观感受下FASText和Fast角点，参数
复制链接

扫一扫

专栏目录