kmeans做为无监督学习的一种聚类方法,原理非常简单,本质上是根据重心(密度中心)不断进行迭代的一个分割方法。
其主要步骤为:
(1)初始化k个中心点
(2)计算所有点到中心点的欧氏距离,形成集合dist
(3)找到dist最小值所在的索引i,将点加入第i个簇
(4)重新计算簇的所有中心,重复2-3直到中心点不变或者达到最大迭代次数。
具体的数学原理可以参考知乎
show the codes
语言:C++
依赖库:PCL1.9.1
//Kmeans.h
#pragma once
#include <iostream>
#include <algorithm>
#include<pcl/point_cloud.h>
#include<pcl/io/pcd_io.h>
#include <pcl/common/centroid.h>
class KMeans
{
private:
unsigned int max_iteration_;
const unsigned int cluster_num_;//k
double pointsDist(const pcl::PointXYZ& p1, const pcl::PointXYZ& p2);
public:
pcl::PointCloud<pcl::PointXYZ>::Ptr centre_points_;
//KMeans() = default;
KMeans(unsigned int k, unsigned int max_iteration);
void kMeans(const pcl::PointCloud<pcl::PointXYZ>::Ptr& cloud, std::vector<pcl::PointCloud<pcl::PointXYZ>> &cluster_cloud1);
~KMeans(){}
};
//Kmeans.cpp
#include "KMeans.h"
double KMeans::pointsDist(const pcl::PointXYZ& p1, const pcl::PointXYZ& p2)
{
//std::cerr << p1.x<<std::endl;
return std::sqrt((p1.x - p2.x) * (p1.x - p2.x) + (p1.y - p2.y) * (p1.y - p2.y) + (p1.z - p2.z) * (p1.z - p2.z));
}
KMeans::KMeans(unsigned int k, unsigned int max_iteration):cluster_num_(k),max_iteration_(max_iteration),centre_points_(new pcl::PointCloud<pcl::PointXYZ>)
{
}
void KMeans::kMeans(const pcl::PointCloud<pcl::PointXYZ>::Ptr& cloud, std::vector<pcl::PointCloud<pcl::PointXYZ>> &cluster_cloud1)
{
if (!cloud->empty()&&!centre_points_->empty())
{
unsigned int iterations = 0;
double sum_diff = 0.2;
std::vector<pcl::PointCloud<pcl::PointXYZ>>cluster_cloud;
while (!(iterations>=max_iteration_||sum_diff<=0.05))//如果大于迭代次数或者两次重心之差小于0.05就停止
//while ((iterations<= max_iteration_ || sum_diff >= 0.1))
{
sum_diff = 0;
std::vector<int> points_processed(cloud->points.size(), 0);
cluster_cloud.clear();
cluster_cloud.resize(cluster_num_);
for (size_t i = 0; i < cloud->points.size(); ++i)
{
if (!points_processed[i])
{
std::vector<double>dists(0, 0);
for (size_t j = 0; j < cluster_num_; ++j)
{
dists.emplace_back(pointsDist(cloud->points[i], centre_points_->points[j]));
}
std::vector<double>::const_iterator min_dist = std::min_element(dists.cbegin(), dists.cend());
unsigned int it = std::distance(dists.cbegin(), min_dist);//获取最小值所在的序号或者位置(从0开始)
//unsigned int it=std::distance(std::cbegin(dists), min_dist);
cluster_cloud[it].points.push_back(cloud->points[i]);//放进最小距离所在的簇
points_processed[i] = 1;
}
else
continue;
}
//重新计算簇重心
pcl::PointCloud<pcl::PointXYZ> new_centre;
for (size_t k = 0; k < cluster_num_; ++k)
{
Eigen::Vector4f centroid;
pcl::PointXYZ centre;
pcl::compute3DCentroid(cluster_cloud[k], centroid);
centre.x = centroid[0];
centre.y = centroid[1];
centre.z = centroid[2];
//centre_points_->clear();
//centre_points_->points.push_back(centre);
new_centre.points.push_back(centre);
}
//计算重心变化量
for (size_t s = 0; s < cluster_num_; ++s)
{
std::cerr << " centre" << centre_points_->points[s] << std::endl;
std::cerr << "new centre" << new_centre.points[s] << std::endl;
sum_diff += pointsDist(new_centre.points[s], centre_points_->points[s]);
}
std::cerr << sum_diff << std::endl;
centre_points_->points.clear();
*centre_points_ = new_centre;
++iterations;
}
std::cerr << cluster_cloud[0].size() << std::endl;
std::cerr << cluster_cloud[1].size() << std::endl;
cluster_cloud1.assign(cluster_cloud.cbegin(),cluster_cloud.cend());//复制点云向量
}
}
//main.cpp
#include"KMeans.h"
int main()
{
KMeans test(2, 10);
//test.centre_points_->points.push_back(pcl::PointXYZ(16.3,-6.35,11.4));
// test.centre_points_->points.push_back(pcl::PointXYZ(15.6, -5.13, 7.49));
//设置初始值
test.centre_points_->points.push_back(pcl::PointXYZ(15.88, -5.5, 11.12));
test.centre_points_->points.push_back(pcl::PointXYZ(15.69, -4.98, 11.05));
pcl::PointCloud<pcl::PointXYZ>::Ptr cloud(new pcl::PointCloud<pcl::PointXYZ>);
std::vector<pcl::PointCloud<pcl::PointXYZ>> output_cloud;
pcl::io::loadPCDFile("2.pcd",*cloud);
std::cerr << "raw cloud size" << cloud->points.size() << std::endl;
test.kMeans(cloud, output_cloud);
for (int i = 0; i < 2; ++i)
{
//pcl::PointCloud<pcl::PointXYZ> cloud1;
//cloud1 = output_cloud[i];
std::cerr <<"output_cloud[i].points.size()"<< output_cloud[i].points.size()<<std::endl;
output_cloud[i].width = output_cloud[i].points.size();
output_cloud[i].height = 1;
output_cloud[i].resize(output_cloud[i].width * output_cloud[i].height);
pcl::io::savePCDFile( "kmeans"+std::to_string(i) + ".pcd", output_cloud[i]);
//pcl::io::savePCDFile()
}
std::cout << "Hello World!\n";
}
实验结果
总结
1.必须已知聚类对象个数,并有合适的初始值
2.对于杆状点云效果不好,面状可以还好
3.kmean本质上还是依赖相似性,和所有依赖欧式距离的分割方法一样(欧式聚类,DBSCAN),点云的距离分布对最终的分割至关重要,当然可以换成别的距离或者度量方式可能效果会好点。