K-means实现(C++)

本文介绍了K-均值聚类算法的详细步骤,包括随机初始化中心点、计算点到中心点的距离、更新簇集中心等,并提供了一个C++代码示例来演示算法的运行过程。通过不断迭代,当簇集中心点的变化小于设定阈值时停止,实现了对数据点的有效聚类。
摘要由CSDN通过智能技术生成

k-means步骤

步骤1:随机设置K个中心点

步骤2:每个点计算到各中心点距离,选取距离最近的中心点,进入聚类簇集

步骤3:计算并得到每个聚类簇集的重心(点集的均值)

步骤4:计算重心与簇集中心距离,如果距离>设定阈值,重新进入步骤2;否则跳出

代码实现

/* k - means */
class Point
{
    //Point() { this->x = 0; this->y = 0; };
public:
    Point() {};
    ~Point() {};
    Point(int x, int y)
    {
        this->x = x;
        this->y = y;
    }
    int x = 0;
    int y = 0;
};


int randIntRange(int min, int max, time_t* seed = 0)
{
    // 设置随机数范围
    int x = rand() % (max - min) + min;
    return x;
}

float distanceEuclidean(Point p1, Point p2)
{
    return std::sqrt((p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y));
}


void clusterMinDis(std::vector<Point>& src, int k, std::vector<Point>& center_pts, std::vector<std::vector<Point>>& dst)
{
    // 设置为k个簇集
    dst.resize(k);

    // 遍历所有其他点
    // 重置k个中心簇集
    for (int i = 0; i < k; i++)
    {
        dst[i].clear();
    }

    for (int j = 0; j < src.size(); j++)
    {
        // 计算其他点到各个中心点距离,进入最近中心点的簇集中
        int min_dis_center_idx = 0;
        float min_dis = 9999;
        for (int center_idx = 0; center_idx < k; center_idx++)
        {
            Point cur_center_pt = center_pts[center_idx];
            float distance = distanceEuclidean(src[j], center_pts[center_idx]);
            // 距离判断
            if (distance < min_dis)
            {
                min_dis = distance;
                min_dis_center_idx = center_idx;
            }
        }
        // 放入中心簇集
        dst[min_dis_center_idx].push_back(src[j]);
    }
}

void kmeans(std::vector<Point>& src, int k, float min_dis_thresh, std::vector<Point>& center_pts, std::vector<std::vector<Point>>& dst)
{
    int pt_num = src.size();

    // 中心点数量 > 点集数量
    if (k > pt_num) return;

    // 初始化设置前k个点为中心点
    center_pts.clear();
    for (int i = 0; i < k; i++)
    {
        center_pts.push_back(src[i]);
    }

    float max_center_change_dist = 9999;

    // 簇集中最大的中心点距离变化值 > 阈值:需要更新簇集中心
    while (max_center_change_dist > min_dis_thresh)
    {

        max_center_change_dist = 0;
        clusterMinDis(src, k, center_pts, dst);

        // 计算所有簇集平均中心
        std::vector<Point> mean_cluster_pt(k);
        for (int i = 0; i < k; i++)
        {
            std::cout << "Cluster pt number:" << dst[i].size() << std::endl;
            Point center(0,0);

            // 簇集无元素,跳过
            if (dst[i].size() == 0)
            {
                continue;
            }

            for (int j = 0; j < dst[i].size(); j++)
            {
                center.x += dst[i][j].x;
                center.y += dst[i][j].y;
            }
            center.x /= dst[i].size();
            center.y /= dst[i].size();

            // 如果均值中心点距离原始中心点>设定值,更新中心点
            float cur_center_change_dist = distanceEuclidean(center, center_pts[i]);

            // 比较得到最大的中心点距离变化值
            if (cur_center_change_dist > max_center_change_dist)
            {
                max_center_change_dist = cur_center_change_dist;
            }
            std::cout << "更新簇集中心" << std::endl;
            std::cout << "中心点变化距离:"<< max_center_change_dist <<",阈值:"<< min_dis_thresh << std::endl;
            std::cout << center.x << "," << center.y << std::endl;
            center_pts[i] = center;
        }
    }
}


void showResult(cv::Mat& res, std::vector<Point>& centerPoints, std::vector<std::vector<Point>>& clusterPoints, std::vector<cv::Scalar>& colorList)
{
    std::cout << "=======================" << std::endl;
    for (int i = 0; i < clusterPoints.size(); i++)
    {
        std::cout << "Center point" << std::endl;
        std::cout << centerPoints[i].x << "," << centerPoints[i].y << std::endl;
        cv::circle(res, cv::Point(centerPoints[i].x, centerPoints[i].y), 6, colorList[i], 1);

        for (int j = 0; j < clusterPoints[i].size(); j++)
        {
            std::cout << "Cluster point" << std::endl;
            std::cout << clusterPoints[i][j].x << "," << clusterPoints[i][j].y << std::endl;
            cv::circle(res, cv::Point(clusterPoints[i][j].x, clusterPoints[i][j].y), 4, colorList[i], -1);
        }
        std::cout << "=======================" << std::endl;
    }
    cv::imshow("K-means", res);
    cv::waitKey(0);
};

// 生成K-means例子
void generate_kmeans_example(int& clusterK, int& randMin, int& randMax, float& disThresh)
{
    std::vector<Point> centerPoints;
    std::vector<std::vector<Point>> clusterPoints;
    std::vector<Point> randomPoints;

    // 设置随机数种子
    srand(time(0));
    std::vector<cv::Scalar> colorList;
    cv::Mat res(randMax, randMax, CV_8UC3, cv::Scalar(255, 255, 255, 0.5));

    // 生成随机簇集颜色表
    for (int ct_idx = 0; ct_idx < clusterK; ct_idx++)
    {
        colorList.push_back(cv::Scalar(rand() % 255, rand() % 255, rand() % 255));
    }

    // 生成随机点集
    for (int pt_idx = 0; pt_idx < 100; pt_idx++)
    {
        int x = randIntRange(randMin, randMax);
        int y = randIntRange(randMin, randMax);
        Point pt(x, y);
        randomPoints.push_back(pt);
    }

    // Inference
    kmeans(randomPoints, clusterK, disThresh, centerPoints, clusterPoints);

    // Show cluster results
    showResult(res, centerPoints, clusterPoints, colorList);
    return;
}
int main()
{
    int randMin = 1;
    int randMax = 1000;
    int clusterK = 4;
    float disThresh = 0.5;
    generate_kmeans_example(clusterK, randMin, randMax, disThresh);
    return 0;
}

示例结果展示

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值