kmeans++聚类模拟,先上三分类的模拟效果:
更改k值可以修改分类数,如二分类效果:
代码实现:
#include<iostream>
#include<vector>
#include<windows.h>
#include<time.h>
using namespace std;
#define random(x) (rand()%x)
#define oo 9999999
vector<double> v;
vector<double> v1, center;
vector<vector<double> > cluster;
double abs(double a){
if(a >=0)
return a;
return (-1)*a;
}
void printV(vector<double> c){
for(int i = 0; i < c.size(); i++)
cout<<c[i]<<',';
cout<<endl;
}
void kmeansPlus(int k){
int i, n = v.size();
double c = v[random(n)];
cout<<"第1个聚类中心:"<<c<<endl;
center.push_back(c);
while(center.size() < k){//找出k个聚类中心为止
double sum = 0;
for(int j = 0; j < v.size(); j++){
double minDis = oo;
for(i = 0; i < center.size(); i++){
minDis = min(minDis, abs(v[j] - center[i]));
}
if(j)//制作轮盘
v1.push_back(v1[j-1] + minDis);
else
v1.push_back(minDis);
sum += minDis;
}
Sleep(1);
srand((unsigned)clock());
double p = random(100)/100.0*v1[n-1];//轮盘指针
// cout<<"指针为:"<<p<<endl;
for(i = 0; i < v1.size() && v1[i] < p; i++);
i--;
// cout<<"轮盘各个区间:\n";
// printV(v1);
printf("第%d个聚类中心:%.2f\n",center.size()+1,v[i]);
center.push_back(v[i]);//下一个聚类中心
v1.clear();
}
//聚类
cluster.resize(k);
for(i = 0; i < n; i++){
double minDis = oo;
int clu;
for(int j = 0; j < k; j++){
if(minDis > abs(center[j]-v[i])){
minDis = abs(center[j]-v[i]);
clu = j;
}
}
cluster[clu].push_back(v[i]);
}
}
int main()
{
for(int i = 0; i < 100; i++){
srand((unsigned)clock());
Sleep(1);
if(i % 10 == 0){//模拟偏差数据
v.push_back(10 + (random(200) - 99) / 5.0);
// cout<<v[i]<<endl;
}
else{//模拟真实数据
v.push_back(10 + (random(20) - 10) / 20.0);
// cout<<v[i]<<endl;
}
}
int i = 0, k = 2;
// while(i++<100){
kmeansPlus(k);
for(int i = 0; i < k; i++){
printf("第%d类\n", i+1);
printV(cluster[i]);
}
// for(int i = 0; i < k; i++)
// cluster[i].clear();
// }
return 0;
}
2分类效果: