1、k-means:
目的:将n个对象分成k个簇,使得结果簇内的相似度高,而簇间的相似度低,且每个簇至少有一个对象,每个对象只属于一个簇。
思想:在所有对象中指定k个对象作为簇心(也可以自己设定不在对象中的点),计算各个对象到簇心的欧氏距离,并将对象划分到离自己最近的簇心所在的簇中,然后重新计算每个簇的簇心,通过迭代的方法再次计算各个对象到簇心的距离并重新划分,知道符合聚类结果的衡量标准(标准见下文)。
使用算法:迭代
聚类结果衡量标准:(1)各个簇中的点不在变化(2)设定一个准则函数:比如,所有对象到簇心的欧几里德距离;各个簇心的变化在某一个阈值之内
#include <iostream>
#include <vector>
#include <math.h>
#define _NUM 3
using namespace std;
/**
特征对象,表示一个点(对象),每个点有两个属性
**/
struct Dot
{
int attrX;
int attrY;
};
/**
计算两个点的欧氏距离
**/
double getDist(Dot dot1, Dot dot2)
{
return sqrt((dot1.attrX - dot2.attrX) * (dot1.attrX - dot2.attrX) + (dot1.attrY - dot2.attrY) * (dot1.attrY - dot2.attrY));
}
/**
计算簇的中心
**/
Dot getCenter(vector<Dot> c)
{
int num = c.size();//注意:这里取得了vector的大小,后面就不要改变它的大小,因为vector是可以动态增加的
int total_X = 0;
int total_Y = 0;
Dot dot_center;
for (int i = 0; i < num; i++)
{
total_X += c[i].attrX;
total_Y += c[i].attrY;
}
dot_center.attrX = total_X / num;
dot_center.attrY = total_Y / num;
return dot_center;
}
/**
准则函数
**/
double getE(vector<Dot> c[], Dot center[])
{
double sum = 0.0;
for (int i = 0; i < _NUM; i++)
{
vector<Dot> v = c[i];
for (int j = 0; j < v.size(); j++)
{
sum += (v[j].attrX - center[i].attrX) * (v[j].attrX - center[i].attrX) + (v[j].attrY - center[i].attrY) * (v[j].attrY - center[i].attrY);
}
}
cout << "sum:" << sum << endl;
return sum;
}
/**
将点放到距离最近的簇(中心)
**/
int chooseClass(Dot d, Dot center[_NUM])
{
int dist = 0;
int toC = 0;
for (int i = 0; i < _NUM; i++)
{
int temp = (d.attrX - center[i].attrX) * (d.attrX - center[i].attrX) + (d.attrY - center[i].attrY) * (d.attrY - center[i].attrY);
if (i == 0)
{
dist = temp;
}
else
{
if (temp < dist)
{
dist = temp;
toC = i;
}
}
}
return toC;
}
/**
k-Means算法
**/
void k_Means(vector<Dot> init)
{
vector<Dot> classes[_NUM];//簇数组
Dot center[_NUM];//各个簇中心
int c;
double newE = -1, oldE = -1;
for (int i = 0; i < _NUM; i++)
{
cin >> c;
classes[i].push_back(init[c - 1]);
center[i] = getCenter(classes[i]);
cout << "center[" << i << "]:" << center[i].attrX << "\t" << center[i].attrY << endl;
}
newE = getE(classes,center);
cout << "oldE:" << oldE << "\tnewE:" << newE << endl;
for (int i = 0; i < _NUM; i++)//清空每个簇
{
classes[i].clear();
}
double tmp = newE > oldE ? (newE - oldE) : (oldE - newE);
while (tmp >= 1.0 )
{
for (int i = 0; i < init.size(); i++)//将各个点放在离它最近的簇中
{
int toC = chooseClass( init[i], center);
classes[toC].push_back(init[i]);
}
cout << "------------------------------" << endl;
for (int i = 0; i < _NUM; i++)//打印每个簇中所包含的点
{
cout << "簇" << i + 1 << ":" << endl;
for (int j = 0; j < classes[i].size(); j++)
{
cout << classes[i][j].attrX << "\t" << classes[i][j].attrY << endl;
}
}
cout << "------------------------------" << endl;
for (int i = 0; i < _NUM; i++)//更新每个簇的中心点
{
center[i] = getCenter(classes[i]);
cout << "center[" << i << "]:" << center[i].attrX << "\t" << center[i].attrY << endl;
}
oldE = newE;
newE = getE(classes, center);
for (int i = 0; i < _NUM; i++)//清空每个簇
{
classes[i].clear();
}
tmp = newE > oldE ? (newE - oldE) : (oldE - newE);
}
}
int main(int args, char* arg[])
{
int n1,n2;
vector<Dot> init;//用于保存所有的点
while ((cin >> n1 >> n2) && n1 != -1 && n2 != -1)//输入每个点的属性值
{
Dot dot;
dot.attrX = n1;
dot.attrY = n2;
init.push_back(dot);
}
k_Means(init);//调用k-Means算法进行聚类分析
return 1;
}