MeanShift聚类算法

MeanShift算法是一个非参数聚类技术,它不要求预先知道聚类的类别个数,对聚类的形状也没有限制。


给定在d维空间上的n个数据点xi, i = 1, ..., n,由核函数K(x)和窗口半径h得到的多元核密度估计函数是:


对于径向对称核函数,它足以确定核函数的更新,核函数k(x)满足:


这里ck,d 是一个归一化的常量,它确保K(x)的积分为1。密度函数的模式位于梯度函数为0的地方,


密度函数的梯度估计(1)为:


其中       。第一项正比于在x处的估计,x是由计算得到的;第二项



是一个MeanShift。MeanShift向量总是指向密度增长最大的方向。MeanShift求解的步骤,是逐次迭代执行以下过程:

  • MeanShift向量 的计算
  • 平移窗口 

以上迭代过程会收敛于一个点,在该点处密度函数的梯度为0。找到MeanShift模式的过程如图1所示


The mean shift clustering algorithm is a practical application of the mode finding procedure

MeanShift聚类算法是一个实用的模式查找过程应用程序:

  • 为了找到密度函数的驻点,从数据点开始,运行MeanShift过程
  • 通过只保留局部最大值修剪这些点

收敛于相同模式的所有点构成的集合定义了模式的吸引域。处于相同吸引域的点聚成相同的一类,图二展示了基于三维数据点的MeanShift聚类的两个例子:

以上内容翻译自:http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/TUZEL1/MeanShift.pdf


下面针对以上理论实现该算法,并编写测试用例。

//meanshift.h

#ifndef _MEANSHIFT_H_
#define _MEANSHIFT_H_

#include "common.h"

class MeanShift
{
public:
	typedef std::vector<VecPoint> VecVecPoint;

	static const float NEAREST_ZERO;
	static const float C;  //!高斯核函数中的常数

	MeanShift() : m_size(0), R(0.0f){}

	 /** \brief 设置输入点云
	   * \param[in]  pPntCloud 输入点云
	   */
	bool setInputCloud(const PointCloud<PointXYZ>::Ptr pPntCloud);


	 /** \brief 获取聚类后的点云  */
	VecVecPoint & getOutputCloud()
	{
		return vv_pnt;
	}

	/** \brief 设置K近邻算法的搜索半径
	   * \param[in]  radius 半径
	   */
	bool setKNNRadius(const float radius)
	{
		R = radius;
        return true;
	}

	 /** \brief 执行MeanShift聚类算法	   */
	bool process();

	 /** \brief 保存聚类后的点云文件
	   * \param[in] dir_name 保存的目录
	   * \param[in] prex_name 文件名前缀
	   */
	bool SaveFile(const char *dir_name, const char *prex_name);

private:
	size_t m_size;  //!要处理点的个数
	PointCloud<PointXYZ>::Ptr mp_pointcloud;  //!PCL形式的点云,主要是为了使用FLANN的KD树
	VecPoint mv_pntcld;  //!点云
	VecPoint mv_local_mode;  //!每个点的局部模式
	VecVecPoint vv_pnt;  //!聚类后的点云

	float R;  //!K近邻算法时对应的搜索半径

	 /** \brief 对每个点执行MeanShift
	   * \param[in]  in_pnt 输入的点
	   * \param[out] out_pnt 输出的点
	   */
	inline bool execMeanShiftEachPoint(const PointXYZ &in_pnt, Point &out_pnt);

	 /** \brief 将具有相同局部模式的点归为一类
	   * \param[in] v_localmode 各个点对应的局部模式
	   * \param[out] vv_pnt 归并后的点
	   */
	bool mergeSameLocalModePoint(const VecPoint &v_localmode, VecVecPoint &vv_pnt);

	inline float gauss(float x)
	{
		return C * sqrt(x) * exp(-0.5 * x);
	}

};

#endif

//meanshift.cpp

#include "meanshift.h"

const float MeanShift::NEAREST_ZERO = 0.01;
const float MeanShift::C = 2.0f;

bool MeanShift::setInputCloud(const PointCloud<PointXYZ>::Ptr pPntCloud)
{
	m_size = pPntCloud->points.size();
	mv_pntcld.resize(m_size);
	mv_local_mode.resize(m_size);
	
	mp_pointcloud = pPntCloud;

	for (size_t i = 0; i < m_size; ++i)
	{
		mv_pntcld[i].x = pPntCloud->points[i].x;
		mv_pntcld[i].y = pPntCloud->points[i].y;
		mv_pntcld[i].z = pPntCloud->points[i].z;
	}

	return true;
}

inline bool MeanShift::execMeanShiftEachPoint(const PointXYZ &in_pnt, Point &out_pnt)
{
	// Set up KDTree
	pcl::KdTreeFLANN<PointXYZ>::Ptr tree (new pcl::KdTreeFLANN<PointXYZ>);
	tree->setInputCloud (mp_pointcloud);

	// Main Loop
	PointXYZ pnt = in_pnt;

	while (1)
	{
		// Neighbors containers
		std::vector<int> k_indices;
		std::vector<float> k_distances;

		float sum_weigh = 0;
		float x = 0.0f, y = 0.0f, z = 0.0f;
		float dist_pnts = 0.0f;

		tree->radiusSearch (pnt, R, k_indices, k_distances);

		for (int i = 0, pnumber = k_indices.size(); i < pnumber; ++i)
		{
			size_t index = k_indices[i];
			PointXYZ &nbhd_pnt = mp_pointcloud->points[index];
			float sqr_dist = k_distances[i];
			float gauss_param = sqr_dist / (R * R);
			float w = gauss(gauss_param);

			x += nbhd_pnt.x * w;
			y += nbhd_pnt.y * w;
			z += nbhd_pnt.z * w;
			sum_weigh += w;
		}
		x = x / sum_weigh;
		y = y / sum_weigh;
		z = z / sum_weigh;

		float diff_x = x - pnt.x, diff_y = y - pnt.y, diff_z = z - pnt.z;

		dist_pnts = sqrt(diff_x * diff_x + diff_y * diff_y + diff_z * diff_z);

		if (dist_pnts <= 0.0001/*MeanShift::NEAREST_ZERO*/)
		{
			break;
		}
		pnt.x = x;
		pnt.y = y;
		pnt.z = z;
	};

	out_pnt.x = pnt.x;
	out_pnt.y = pnt.y;
	out_pnt.z = pnt.z;

	return true;
}

bool MeanShift::mergeSameLocalModePoint(const VecPoint &v_localmode, VecVecPoint &vv_pnt)
{
	assert(v_localmode.size() == m_size);

	std::vector<bool> v_iscluster(m_size, false);

	for (size_t i = 0; i < m_size; ++i)
	{
		for (size_t j = i + 1; j < m_size; ++j)
		{
			const Point & lmpnt1 = v_localmode[i];
			const Point & lmpnt2 = v_localmode[j];
			Point  pnt1(mp_pointcloud->points[i].x, mp_pointcloud->points[i].y, mp_pointcloud->points[i].z);
			Point  pnt2(mp_pointcloud->points[j].x, mp_pointcloud->points[j].y, mp_pointcloud->points[j].z);
			float dist = 0.0f;

			dist = getPointsDist(lmpnt1, lmpnt2);
			if (dist <= MeanShift::NEAREST_ZERO)
			{
				//两个点可近似重合
				VecPoint v_pnt;

				if ( !v_iscluster[i] &&  !v_iscluster[j])
				{
					v_iscluster[i] = true;
					v_pnt.push_back(pnt1);

					v_iscluster[j] = true;
					v_pnt.push_back(pnt2);

					vv_pnt.push_back(v_pnt);
				}
				else if ( v_iscluster[i] &&  !v_iscluster[j])
				{
					size_t clustered_count = vv_pnt.size();
					size_t m, n;
					for (m = 0; m < clustered_count; ++m)
					{
						size_t count = vv_pnt[m].size();
						for (n = 0; n < count; ++n)
						{
							if (pnt1 == vv_pnt[m][n])
							{
								vv_pnt[m].push_back(pnt2);
								v_iscluster[j] = true;
								goto LABEL1;
							}
						}  // for n
					}  // for m
LABEL1:
                    ;
				}  // else if
				else if ( !v_iscluster[i] &&  v_iscluster[j])
				{
					size_t clustered_count = vv_pnt.size();
					size_t m, n;
					for (m = 0; m < clustered_count; ++m)
					{
						size_t count = vv_pnt[m].size();
						for (n = 0; n < count; ++n)
						{
							if (pnt2 == vv_pnt[m][n])
							{
								vv_pnt[m].push_back(pnt1);
								v_iscluster[i] = true;
								goto LABEL2;
							}
						}  // for n
					}  // for m
LABEL2:
                    ;
				}
				else
				{
					//都已经聚类,就不做处理
				}
			}  //  if (dist <= NEAREST_ZERO)

		}  //  for j
	}  //  for i

	for (size_t i = 0; i < m_size; ++i)
	{
		if (!v_iscluster[i])
		{
			Point  pnt(mp_pointcloud->points[i].x, mp_pointcloud->points[i].y, mp_pointcloud->points[i].z);
			VecPoint v_pnt;

			v_iscluster[i] = true;
			v_pnt.push_back(pnt);
			vv_pnt.push_back(v_pnt);
		}
	}

	return true;
}

bool MeanShift::process()
{
	for (size_t i = 0; i < m_size; ++i)
	{
		const PointXYZ &pnt = mp_pointcloud->points[i];
		execMeanShiftEachPoint(pnt, mv_local_mode[i]);
	}

	mergeSameLocalModePoint(mv_local_mode, vv_pnt);

	return true;
}

bool MeanShift::SaveFile(const char *dir_name, const char *prex_name)
{
	size_t cluster_count = vv_pnt.size();

	for (size_t i = 0; i < cluster_count; ++i)
	{
		pcl::PointCloud<pcl::PointXYZ>::Ptr p_pnt_cloud(new pcl::PointCloud<pcl::PointXYZ> ());

		for (size_t j = 0, grp_pnt_count = vv_pnt[i].size(); j < grp_pnt_count; ++j)
		{
			pcl::PointXYZ pt;
			pt.x = vv_pnt[i][j].x;
			pt.y = vv_pnt[i][j].y;
			pt.z = vv_pnt[i][j].z;

			p_pnt_cloud->points.push_back(pt);
		}

		p_pnt_cloud->width = static_cast<size_t>(vv_pnt[i].size());
		p_pnt_cloud->height = 1;

		char newFileName[256] = {0};
		char indexStr[16] = {0};

		strcat(newFileName, dir_name);
		strcat(newFileName, "/");
		strcat(newFileName, prex_name);
		strcat(newFileName, "-");
		sprintf(indexStr, "%d", i + 1);
		strcat(newFileName, indexStr);
		strcat(newFileName, ".pcd");
		io::savePCDFileASCII(newFileName, *p_pnt_cloud);
	}

	return true;
}


测试程序:

                pcl::PointCloud<pcl::PointXYZ>::Ptr cld (new pcl::PointCloud<pcl::PointXYZ>);		
                int val = 10;
		//构造立方体  
		float cube_len = 1;  
		srand(time(NULL));
		for (float x = 0; x < cube_len; x += 0.1)  
		{  
			for (float y = 0; y < cube_len; y += 0.1)  
			{  
				for (float z = 0; z < cube_len; z += 0.1)  
				{  
					pcl::PointXYZ basic_point;    

					//沿着向量(2.5, 2.5, 2.5)平移  
					basic_point.x = (float)(rand() % val) / 10.0f + 2.5;    
					basic_point.y = (float)(rand() % val) / 10.0f + 2.5;    
					basic_point.z = (float)(rand() % val) / 10.0f + 2.5;    
					cld->points.push_back(basic_point);    
				}  
			}  
		}  

		for (float x = 0; x < cube_len; x += 0.1)  
		{  
			for (float y = 0; y < cube_len; y += 0.1)  
			{  
				for (float z = 0; z < cube_len; z += 0.1)  
				{  
					pcl::PointXYZ basic_point;    

					basic_point.x = (float)(rand() % val) / 10.0f - 2.5;    
					basic_point.y = (float)(rand() % val) / 10.0f - 2.5;    
					basic_point.z = (float)(rand() % val) / 10.0f - 2.5;    
					cld->points.push_back(basic_point);    
				}  
			}  
		}  

		cld->width = (int)cld->points.size();    
		cld->height = 1;  
		io::savePCDFileASCII("manual_build.pcd", *cld);

		MeanShift ms;
		ms.setInputCloud(cld);
		ms.setKNNRadius(0.7);
		ms.process();
		ms.SaveFile(".", "meanshift");
		system("pause");

显示manual_build.pcd文件中构造的数据为:

运行测试用例后生成了2个文件,对应2类,分别为meanshift-1.pcd和meanshift-2.pcd

同时显示这两个文件为:

尽管对于这个例子而言聚类比较成功,但是也有一些例子聚类效果不好,不仅如此,执行以上程序时时间复杂度过大,导致几千个点运行时间就受不鸟了,暂时还没想到如何改进。

  • 4
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值