k-means步骤
步骤1:随机设置K个中心点
步骤2:每个点计算到各中心点距离,选取距离最近的中心点,进入聚类簇集
步骤3:计算并得到每个聚类簇集的重心(点集的均值)
步骤4:计算重心与簇集中心距离,如果距离>设定阈值,重新进入步骤2;否则跳出
代码实现
/* k - means */
class Point
{
//Point() { this->x = 0; this->y = 0; };
public:
Point() {};
~Point() {};
Point(int x, int y)
{
this->x = x;
this->y = y;
}
int x = 0;
int y = 0;
};
int randIntRange(int min, int max, time_t* seed = 0)
{
// 设置随机数范围
int x = rand() % (max - min) + min;
return x;
}
float distanceEuclidean(Point p1, Point p2)
{
return std::sqrt((p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y));
}
void clusterMinDis(std::vector<Point>& src, int k, std::vector<Point>& center_pts, std::vector<std::vector<Point>>& dst)
{
// 设置为k个簇集
dst.resize(k);
// 遍历所有其他点
// 重置k个中心簇集
for (int i = 0; i < k; i++)
{
dst[i].clear();
}
for (int j = 0; j < src.size(); j++)
{
// 计算其他点到各个中心点距离,进入最近中心点的簇集中
int min_dis_center_idx = 0;
float min_dis = 9999;
for (int center_idx = 0; center_idx < k; center_idx++)
{
Point cur_center_pt = center_pts[center_idx];
float distance = distanceEuclidean(src[j], center_pts[center_idx]);
// 距离判断
if (distance < min_dis)
{
min_dis = distance;
min_dis_center_idx = center_idx;
}
}
// 放入中心簇集
dst[min_dis_center_idx].push_back(src[j]);
}
}
void kmeans(std::vector<Point>& src, int k, float min_dis_thresh, std::vector<Point>& center_pts, std::vector<std::vector<Point>>& dst)
{
int pt_num = src.size();
// 中心点数量 > 点集数量
if (k > pt_num) return;
// 初始化设置前k个点为中心点
center_pts.clear();
for (int i = 0; i < k; i++)
{
center_pts.push_back(src[i]);
}
float max_center_change_dist = 9999;
// 簇集中最大的中心点距离变化值 > 阈值:需要更新簇集中心
while (max_center_change_dist > min_dis_thresh)
{
max_center_change_dist = 0;
clusterMinDis(src, k, center_pts, dst);
// 计算所有簇集平均中心
std::vector<Point> mean_cluster_pt(k);
for (int i = 0; i < k; i++)
{
std::cout << "Cluster pt number:" << dst[i].size() << std::endl;
Point center(0,0);
// 簇集无元素,跳过
if (dst[i].size() == 0)
{
continue;
}
for (int j = 0; j < dst[i].size(); j++)
{
center.x += dst[i][j].x;
center.y += dst[i][j].y;
}
center.x /= dst[i].size();
center.y /= dst[i].size();
// 如果均值中心点距离原始中心点>设定值,更新中心点
float cur_center_change_dist = distanceEuclidean(center, center_pts[i]);
// 比较得到最大的中心点距离变化值
if (cur_center_change_dist > max_center_change_dist)
{
max_center_change_dist = cur_center_change_dist;
}
std::cout << "更新簇集中心" << std::endl;
std::cout << "中心点变化距离:"<< max_center_change_dist <<",阈值:"<< min_dis_thresh << std::endl;
std::cout << center.x << "," << center.y << std::endl;
center_pts[i] = center;
}
}
}
void showResult(cv::Mat& res, std::vector<Point>& centerPoints, std::vector<std::vector<Point>>& clusterPoints, std::vector<cv::Scalar>& colorList)
{
std::cout << "=======================" << std::endl;
for (int i = 0; i < clusterPoints.size(); i++)
{
std::cout << "Center point" << std::endl;
std::cout << centerPoints[i].x << "," << centerPoints[i].y << std::endl;
cv::circle(res, cv::Point(centerPoints[i].x, centerPoints[i].y), 6, colorList[i], 1);
for (int j = 0; j < clusterPoints[i].size(); j++)
{
std::cout << "Cluster point" << std::endl;
std::cout << clusterPoints[i][j].x << "," << clusterPoints[i][j].y << std::endl;
cv::circle(res, cv::Point(clusterPoints[i][j].x, clusterPoints[i][j].y), 4, colorList[i], -1);
}
std::cout << "=======================" << std::endl;
}
cv::imshow("K-means", res);
cv::waitKey(0);
};
// 生成K-means例子
void generate_kmeans_example(int& clusterK, int& randMin, int& randMax, float& disThresh)
{
std::vector<Point> centerPoints;
std::vector<std::vector<Point>> clusterPoints;
std::vector<Point> randomPoints;
// 设置随机数种子
srand(time(0));
std::vector<cv::Scalar> colorList;
cv::Mat res(randMax, randMax, CV_8UC3, cv::Scalar(255, 255, 255, 0.5));
// 生成随机簇集颜色表
for (int ct_idx = 0; ct_idx < clusterK; ct_idx++)
{
colorList.push_back(cv::Scalar(rand() % 255, rand() % 255, rand() % 255));
}
// 生成随机点集
for (int pt_idx = 0; pt_idx < 100; pt_idx++)
{
int x = randIntRange(randMin, randMax);
int y = randIntRange(randMin, randMax);
Point pt(x, y);
randomPoints.push_back(pt);
}
// Inference
kmeans(randomPoints, clusterK, disThresh, centerPoints, clusterPoints);
// Show cluster results
showResult(res, centerPoints, clusterPoints, colorList);
return;
}
int main()
{
int randMin = 1;
int randMax = 1000;
int clusterK = 4;
float disThresh = 0.5;
generate_kmeans_example(clusterK, randMin, randMax, disThresh);
return 0;
}
示例结果展示