计量地理-聚类

按照计量地理思想,对距离矩阵进行聚类,数据如

6
0.0 0.375 0.483 1.749 1.516 1.972
0.375 0.0 0.776 1.596 1.336 1.743
0.483 0.776 0.0 1.926 1.662 2.154
1.749 1.596 1.926 0.0 0.501 0.693
1.516 1.336 1.662 0.501 0.0 0.589

1.972 1.743 2.154 0.693 0.589 0.0

第一行为数据个数,下面为距离矩阵,可以下三角(上三角)全为0。

// ClusterAnalysis.cpp : Defines the entry point for the console application.

//

#include "stdafx.h"
#include "iostream"
using namespace std;
#include "iomanip"


#include "vector"
#include "string"


typedef struct {
	int iClaNO1;
	int iClaNO2;
	int iNO;			// 标记聚类的次序
}NEW_NODE;


typedef struct {
	int iSample1;
	int iSample2;
	float fDist;
}DIST_NODE;


void Bubble(int *sort,int iCount)
{
	for(int i = 0;i < iCount - 1;i++){
		for(int j = 0;j < iCount - i - 1;j++){
			float temp;


			if(sort[j + 1] < sort[j]){
				temp = sort[j];
				sort[j] = sort[j + 1];
				sort[j + 1] = temp;


			}
		}
	}
}


int _tmain(int argc, _TCHAR* argv[])
{
	int iSampleCount;					// 要分类的样本数
	float **m_data;						// 存储距离矩阵	


	string strFile = "COST_MATRIX.txt";
	FILE *fp;


	if((fp=fopen(strFile.c_str(),"r"))==NULL)
	{
		printf("Cannot open this file\n");
		exit(0);


	}


	fscanf(fp,"%d",&iSampleCount);


	m_data = new float *[iSampleCount];
	for(int i = 0;i < iSampleCount;i++)
		m_data[i] = new float[iSampleCount];


	for(int i = 0;i < iSampleCount;i++)
		for(int j = 0;j < iSampleCount;j++)
			fscanf(fp,"%f",&m_data[i][j]);


	// 原始数据输出到屏幕
	/*for(int i = 0;i < iSampleCount;i++){
		for(int j = 0;j < iSampleCount;j++)
			cout << setw(5) << m_data[i][j] << "   ";
		cout << endl;
	}*/


	// 存储初始距离矩阵中的元素
	vector<DIST_NODE> vecDist;
	for(int i = 0;i < iSampleCount;i++){
		for(int j = i + 1;j < iSampleCount;j++){
			DIST_NODE temp;
			temp.fDist = m_data[i][j];
			temp.iSample1 = i + 1;
			temp.iSample2 = j + 1;
			vecDist.push_back(temp);
		}
	}
	int iVecSize = vecDist.size();	
	
	// 获取距离中的最大值
	float flMax = vecDist.at(0).fDist;
	for(int i = 0;i < iSampleCount;i++)
		if(flMax < vecDist.at(i).fDist)
			flMax = vecDist.at(i).fDist;


	cout << "开始聚类过程" << endl;
	vector<NEW_NODE> vecResult;
	vector<int> vecNO;
	int iNO = 0;
	while(true){
		vector<NEW_NODE> vecNew;
		float flMin = flMax;


		iVecSize = vecDist.size();
		if(vecDist.size() == 0)
			break;		


		if(vecDist.size() == 1){
			NEW_NODE temp;
			temp.iClaNO1 = vecDist.at(0).iSample1;
			temp.iClaNO2 = vecDist.at(0).iSample2;
			temp.iNO     = iNO +1;


			vecResult.push_back(temp);
			vecNO.push_back(iNO + 1);
			vecDist.clear();
			continue;
		}
				
		// 确定最小值		
		for(int i = 0;i < iVecSize;i++){
			if(flMin >= vecDist.at(i).fDist &&  vecDist.at(i).fDist!=-1)
				flMin = vecDist.at(i).fDist;					
		
		}


		// 根据最小值确定相应的聚类单元
		// 存储聚类单元的编号,有可能重复
		for(int i = 0;i < iVecSize;i++){			
			if(flMin == vecDist.at(i).fDist){					
				NEW_NODE temp;
				temp.iClaNO1 = vecDist.at(i).iSample1;
				temp.iClaNO2 = vecDist.at(i).iSample2;
				temp.iNO = iNO + 1;


				vecNew.push_back(temp);				


			}			
		}
		iNO++;	// 为下次聚类准备
		vecNO.push_back(iNO);


		// 剔除vecNew中重复的类别号码
		int iNewCount = vecNew.size();
		int *iNewData = new int[2 * iNewCount];		
		vector<int> vecNewDel;		// 剔除重复的点之后结果
		if(iNewCount > 1)
		{
			int iNewCount = vecNew.size();
			for(int i = 0;i < iNewCount;i++){
				iNewData[i * 2] = vecNew.at(i).iClaNO1;
				iNewData[i * 2 + 1] = vecNew.at(i).iClaNO2;
				
			}


			// 对iNewData进行冒泡排序
			Bubble(iNewData,iNewCount * 2);


			for(int i = 0;i < iNewCount * 2;i++)
			{
				for(int j = i + 1;j < iNewCount * 2;j++)
					if(iNewData[i] != iNewData[j])
					{
						vecNewDel.push_back(iNewData[i]);


						if(j == iNewCount * 2 - 1)
							vecNewDel.push_back(iNewData[i + 1]);
						break;
					}
					else
					{
						if(i == iNewCount * 2 - 2)
							vecNewDel.push_back(iNewData[i]);
						break;


					}
			}
		}		
		else
		{
			vecNewDel.push_back(vecNew.at(0).iClaNO1);
			vecNewDel.push_back(vecNew.at(0).iClaNO2);


		}


		// 确定要合并的类之后,更新vecDist栈中的内容
		// 首先将未参与聚类的样本取出,然后再计算新类到其余样本之间的距离
		vector<DIST_NODE> vecTemp;		
		for(int i = 0;i < vecDist.size();i++){
			int iSampleNo1 = vecDist.at(i).iSample1;
			int iSampleNo2 = vecDist.at(i).iSample2;


			bool blCluster = false;
			for(int j = 0;j < vecNewDel.size();j++){
				if(iSampleNo1 == vecNewDel.at(j) || 
					iSampleNo2 == vecNewDel.at(j)){
						blCluster = true;
						break;
					
				}
			}


			if(!blCluster){
				DIST_NODE temp = vecDist.at(i);
				vecTemp.push_back(temp);


			}
		}
		
		int iUnSize = vecTemp.size();
		int *iUnCluster;		
		vector<int>vecUnCluster;
		if(!vecTemp.empty()){
			iUnCluster = new int[2 * iUnSize];	
			for(int i = 0;i < iUnSize;i++){
				iUnCluster[i * 2] = vecTemp.at(i).iSample1;
				iUnCluster[i * 2 + 1] = vecTemp.at(i).iSample2;


			}


			Bubble(iUnCluster,iUnSize * 2);


			for(int i = 0;i < iUnSize * 2;i++){
				for(int j = i + 1;j < iUnSize * 2;j++)
					if(iUnCluster[i] != iUnCluster[j]){
						vecUnCluster.push_back(iUnCluster[i]);


						if(j == iUnSize * 2 - 1)
							vecUnCluster.push_back(iUnCluster[i + 1]);


						break;


					}
					else{
						if(i == iUnSize * 2 - 2)
							vecUnCluster.push_back(iUnCluster[i]);
						break;


					}
			}
		}	
		else{		// if(!vecTemp.empty())
			for(int i = 0;i < vecDist.size();i++){
				bool blCluster = false;


				int iSampleNo1 = vecDist.at(i).iSample1;
				int iSampleNo2 = vecDist.at(i).iSample2;


				for(int j = 0;j < vecNewDel.size();j++){
					if(iSampleNo1 == vecNewDel.at(j)){
						blCluster = true;
						break;
					}
				}


				if(!blCluster){
					if(vecUnCluster.empty())
						vecUnCluster.push_back(iSampleNo1);
					else{
						bool blAdd = true;
						for(int k = 0;k < vecUnCluster.size();k++){
							if(iSampleNo1 == vecUnCluster.at(k)){
								blAdd = false;
								break;
							}
						}


						if(blAdd)
							vecUnCluster.push_back(iSampleNo1);
					}
				}


				blCluster = false;
				for(int j = 0;j < vecNewDel.size();j++){
					if(iSampleNo2 == vecNewDel.at(j)){
						blCluster = true;
						break;
					}
				}


				if(!blCluster){
					if(vecUnCluster.empty())
						vecUnCluster.push_back(iSampleNo2);
					else{
						bool blAdd = true;
						for(int k = 0;k < vecUnCluster.size();k++){
							if(iSampleNo2 == vecUnCluster.at(k)){
								blAdd = false;
								break;
							}
						}


						if(blAdd)
							vecUnCluster.push_back(iSampleNo2);
					}
				}// if(!blCluster)


			}
		}


		// 计算新类到未参与聚类样本点之间的距离
		iSampleCount++;		// 标记新的样本点号
		for(int i = 0;i < vecUnCluster.size();i++){
			DIST_NODE temp;
			temp.iSample1 = vecUnCluster.at(i);
			temp.iSample2 = iSampleCount;


			float fTemp = flMax;
			// 本次聚类过程的新类由哪几个样本构成,即iSampleCount由谁构成
			for(int j = 0;j < vecNewDel.size();j++){
				int iClass = vecNewDel.at(j);


				// 在vecDist中遍历,寻找与temp.iSample1、iClass所对应的节点
				for(int k = 0;k < vecDist.size();k++){
					if((temp.iSample1 == vecDist.at(k).iSample1 && 
						  iClass == vecDist.at(k).iSample2) ||
						 (temp.iSample1 == vecDist.at(k).iSample2 &&
						  iClass == vecDist.at(k).iSample1)){
							if(fTemp > vecDist.at(k).fDist)
								fTemp = vecDist.at(k).fDist;
							break;


					}
				}
			}


			temp.fDist = fTemp;
			vecTemp.push_back(temp);
		}


		// 清空vecDist,用vecTemp中的内容进行替换
		vecDist.clear();
		for(int i = 0;i < vecTemp.size();i++)
			vecDist.push_back(vecTemp.at(i));		


		//  将每次聚类的结果保存
		for(int i = 0;i < vecNew.size();i++)
			vecResult.push_back(vecNew.at(i));
		
	}


	//  输出每次的聚类结果	
	for(int i = 0;i < vecNO.size();i++){		
		int iNO = vecNO.at(i);
		cout << "第" << iNO << "次聚类对象是:  ";


		for(int j = 0;j < vecResult.size();j++)
			if(iNO == vecResult.at(j).iNO)
				cout << vecResult.at(j).iClaNO2 << "和" 
				     << vecResult.at(j).iClaNO1 << "; ";


		cout<< endl;


	}
	
	cout << "结束聚类过程" << endl;
	system("pause");
	return 0;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值