kmeans算法

// k-means.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include <fstream>
#include <math.h>
#include <vector>
#include <iostream>

#define k 3

using namespace std;

//存放元组的属性信息
struct Tuple
{
	float attr1;
	float attr2;
};

//计算两个元组的欧几里得距离
float getDistXY(Tuple t1, Tuple t2)
{
	return sqrt((t1.attr1 - t2.attr1)*(t1.attr1 - t2.attr1) + (t1.attr2 - t2.attr2)*(t1.attr2 - t2.attr2));
}

//根据质心,决定当前元组属于哪个簇
int clusterOfTuple(Tuple means[], Tuple tuple)
{
	float dist = getDistXY(means[0], tuple);
	float tmp;
	int label = 0;//标记属于哪一个簇
	for (int i = 1; i < k; i++)
	{
		tmp = getDistXY(means[i], tuple);
		if (tmp < dist)
		{
			dist = tmp;
			label = i;
		}
	}
	return label;
}

//获得给定簇集的平方误差
float getVar( vector<Tuple> clusters[], Tuple means[] )
{
	float var = 0;
	for (int i = 0; i < k; i++)
	{
		vector<Tuple> t = clusters[i];
		for (int j = 0; j < t.size(); j++)
		{
			var += getDistXY(t[j], means[i]);
		}
	}
	return var;
}

//获得当前簇的均值(质心)
Tuple getMeans(vector<Tuple> cluster)
{
	int num = cluster.size();
	double meansX = 0;
	double meansY = 0;
	Tuple t;
	for (int i = 0; i < num; i++)
	{
		meansX += cluster[i].attr1;
		meansY += cluster[i].attr2;
	}
	t.attr1 = meansX / num;
	t.attr2 = meansY / num;
	return t;
}

void KMeans(vector<Tuple> tuples)   
{
	vector<Tuple> clusters[k];    //设置有几簇
	Tuple means[k];               //设置每簇的质心
	int i = 0;
	//默认一开始将前k个元组的值作为k个簇的质心
	for (; i < k; i++)
	{
		means[i].attr1 = tuples[i].attr1;
		means[i].attr2 = tuples[i].attr2;
	}

	//看当前元组属于哪个簇,并将其导入
	int label = 0;
	for (i = 0; i != tuples.size(); i++)
	{
		label = clusterOfTuple(means, tuples[i]);
		clusters[label].push_back(tuples[i]);
	}

	//输出刚开始的簇
	for (label = 0; label < k; label++)
	{
		cout << "第" <<label + 1 << "个簇:"<<endl;
		vector<Tuple> t = clusters[label];
		for ( i = 0; i < t.size(); i++)
		{
			cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t";
		}
		cout<<endl;
	}

	float oldVar = -1;
	float newVar = getVar(clusters, means);
	while (abs(newVar - oldVar) >= 1)  //当新旧函数值即平方差不到1即则准函数值不发生明显变化时,算法终止
	{
		for (i = 0; i < k; i++)  //更新每个簇的中心点
		{
			means[i] = getMeans(clusters[i]);
		}
		oldVar = newVar;
		newVar = getVar(clusters, means);
		//清空每个簇
		for (i = 0; i < k; i++)
		{
			clusters[i].clear();
		}

		//根据新的质心获得新的簇
		for (i = 0; i != tuples.size(); i++)
		{
			label = clusterOfTuple(means, tuples[i]);
			clusters[label].push_back(tuples[i]);
		}

		//输出当前簇
		for (label = 0; label < k; label++)
		{
			cout << "第" <<label + 1 << "个簇:"<<endl;
			vector<Tuple> t = clusters[label];
			for ( i = 0; i < t.size(); i++)
			{
				cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t";
			}
			cout<<endl;
		}
	}
}


int _tmain(int argc, _TCHAR* argv[])
{
	const char* fileName = "C:\\Users\\sony\\Desktop\\data.txt";
	ifstream infile(fileName);
	if (!infile.is_open())
	{
		cout<<"不能打开输入文件"<<fileName<<endl;
	}
	int count = 0;
	vector<Tuple> tuples;
	Tuple tuple;
	//从文件流中读入数据
	while (!infile.eof())
	{
		count++;
		if (count%2 == 1)
		{
			infile>>tuple.attr1;
		}else
		{
			infile>>tuple.attr2;
			tuples.push_back(tuple);
		}
	}

	//输出文件中的元组信息
	for (auto it = tuples.begin(); it != tuples.end(); it++)
	{
		cout << "(" << (*it).attr1 <<", "<<(*it).attr2<<")"<<"\t";
		
	}
	cout << endl;
	KMeans(tuples);

	system("pause");
	return 0;
}

原博文链接:点击打开链接

以上为二维,一维则为以下情况,这里注意在用ifstream读取数据时,如果以infile.eof()来判断结尾,则最后会出现读取2次最后数据的现象,stream 中的 eofbit 标记是在尝试读取文件结尾时才设立的读取完最后一个数据后,fstream 仍处于正常状态,所以下一次 while 判断不会跳出,再次 fin>>x 时,fstream 发现没有数据可读,此时才会设立 failbit。但由于已进入循环,虽然未读数据,x 仍保留上次的值,所以就又一次 pushback 了。

代码如下:

// text.cpp : 定义控制台应用程序的入口点。


#include "stdafx.h"
#include <fstream>
#include <math.h>
#include <vector>
#include <iostream>

#define k 3

using namespace std;

//存放元组的属性信息
struct Tuple
{
	float attr1;
	/*float attr2;*/
};

//计算两个元组的欧几里得距离
float getDistXY(Tuple t1, Tuple t2)
{
	return sqrt((t1.attr1 - t2.attr1)*(t1.attr1 - t2.attr1) );
}

//根据质心,决定当前元组属于哪个簇
int clusterOfTuple(Tuple means[], Tuple tuple)
{
	float dist = getDistXY(means[0], tuple);
	float tmp;
	int label = 0;//标记属于哪一个簇
	for (int i = 1; i < k; i++)
	{
		tmp = getDistXY(means[i], tuple);
		if (tmp < dist)
		{
			dist = tmp;
			label = i;
		}
	}
	return label;
}

//获得给定簇集的平方误差
float getVar( vector<Tuple> clusters[], Tuple means[] )
{
	float var = 0;
	for (int i = 0; i < k; i++)
	{
		vector<Tuple> t = clusters[i];
		for (int j = 0; j < t.size(); j++)
		{
			var += getDistXY(t[j], means[i]);
		}
	}
	return var;
}

//获得当前簇的均值(质心)
Tuple getMeans(vector<Tuple> cluster)
{
	int num = cluster.size();
	double meansX = 0;
	
	Tuple t;
	for (int i = 0; i < num; i++)
	{
		meansX += cluster[i].attr1;
		
	}
	t.attr1 = meansX / num;
	
	return t;
}

void KMeans(vector<Tuple> tuples)   
{
	vector<Tuple> clusters[k];    //设置有几簇
	Tuple means[k];               //设置每簇的质心
	int i = 0;
	//默认一开始将前k个元组的值作为k个簇的质心
	for (i = 0; i < k; i++)
	{
		means[i].attr1 = tuples[i].attr1;
		
	}

	//看当前元组属于哪个簇,并将其导入
	int label = 0;
	for (i = 0; i != tuples.size(); i++)
	{
		label = clusterOfTuple(means, tuples[i]);
		clusters[label].push_back(tuples[i]);
	}

	输出刚开始的簇
	//for (label = 0; label < k; label++)
	//{
	//	cout << "第" <<label + 1 << "个簇:"<<endl;
	//	vector<Tuple> t = clusters[label];
	//	for ( i = 0; i < t.size(); i++)
	//	{
	//		cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t";
	//	}
	//	cout<<endl;
	//}

	float oldVar = -1;
	float newVar = getVar(clusters, means);
	while (abs(newVar - oldVar) >= 1)  //当新旧函数值即平方差不到1即则准函数值不发生明显变化时,算法终止
	{
		for (i = 0; i < k; i++)  //更新每个簇的中心点
		{
			means[i] = getMeans(clusters[i]);
		}
		oldVar = newVar;
		newVar = getVar(clusters, means);
		//清空每个簇
		for (i = 0; i < k; i++)
		{
			clusters[i].clear();
		}

		//根据新的质心获得新的簇
		for (i = 0; i != tuples.size(); i++)
		{
			label = clusterOfTuple(means, tuples[i]);
			clusters[label].push_back(tuples[i]);
		}

		输出当前簇
		//for (label = 0; label < k; label++)
		//{
		//	/*cout << "第" <<label + 1 << "个簇:"<<endl;
		//	vector<Tuple> t = clusters[label];
		//	for ( i = 0; i < t.size(); i++)
		//	{
		//	cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t";
		//	}
		//	cout<<endl;*/
		//	cout << "第" <<label + 1 << "个簇:"<<endl;
		//	vector<Tuple> t = clusters[label];
		//	for ( i = 0; i < t.size(); i++)
		//	{
		//		cout << t[i].attr1 <<"\t";
		//	}
		//	cout<<endl;
		
	}
	//输出当前簇
		for (label = 0; label < k; label++)
		{
			/*cout << "第" <<label + 1 << "个簇:"<<endl;
			vector<Tuple> t = clusters[label];
			for ( i = 0; i < t.size(); i++)
			{
			cout <<"(" << t[i].attr1 <<", "<< t[i].attr2<<")"<<"\t";
			}
			cout<<endl;*/
			cout << "第" <<label + 1 << "个簇:"<<endl;
			vector<Tuple> t = clusters[label];
			for ( i = 0; i < t.size(); i++)
			{
				cout << t[i].attr1 <<"\t";
			}
			cout<<endl;
		}
}


int _tmain(int argc, _TCHAR* argv[])
{
	const char* fileName = "C:\\Users\\sony\\Desktop\\data.txt";
	ifstream infile(fileName, ios::in);
	if (!infile.is_open())
	{
		cout<<"不能打开输入文件"<<fileName<<endl;
	}
	int count = 0;
	vector<Tuple> tuples;
	Tuple tuple;
	//从文件流中读入数据
	while (infile>>tuple.attr1)
	{
		/*count++;
		if (count%2 == 1)
		{
			infile>>tuple.attr1;
		}else
		{
			infile>>tuple.attr1;
			tuples.push_back(tuple);
		}*/

		
		tuples.push_back(tuple);
	}

	输出文件中的元组信息
	//for (auto it = tuples.begin(); it != tuples.end(); it++)
	//{
	//	cout << "(" << (*it).attr1 <<", "<<(*it).attr2<<")"<<"\t";

	//}
	cout << endl;
	KMeans(tuples);

	system("pause");
	return 0;
}


  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值