人工智能考试——聚类分析

一、题目

使用kmeans聚类算法对所提供的数据集进行聚类分析。通过修改提供的kmeans.c文件,读取2维的clusterSet.txt数据集,设定k为2和4两种情况,分别对该数据集进行聚类,并将最终的聚类中心和每个数据行所属的簇或组输出。

二、代码

#include <stdio.h>
#include "maths.h"
#include "rand.h"
#define MAX_LINE 1024
/*
* Features and Feature Vector Symbolic Constants and Types
*/

#define MAX_FEATURE_VECTORS	80
#define MAX_FEATURES		2


typedef struct {
	int class;
	float features[MAX_FEATURES];
} feature_vector_t;

/* Prototype Feature Vectors */
feature_vector_t fvec[MAX_FEATURE_VECTORS];



float feature_strings[MAX_FEATURES][MAX_FEATURES];

/* Number of clusters */
#define K		2

/* Cluster Centroids */
double centroids[K][MAX_FEATURES];



void initialize_prototype_vectors(void)
{
	int vector, feature;


	char buf[MAX_LINE];  /*缓冲区*/
	FILE *fp;            /*文件指针*/

	if ((fp = fopen("C:\\Users\\15721\\Desktop\\大三计算机课程PPt\\人工智能\\工程\\机器学习\\机器学习算法\\clusterSet.txt", "r")) == NULL)
	{
		perror("fail to read");
		//exit(1);
		return 0;
	}
	else
	{
		printf("读取clusterSet.txt文件成功!\n");
	}
	
	
	for (vector = 0; vector < MAX_FEATURE_VECTORS; vector++) {
			fscanf(fp, "%f	%f", &fvec[vector].features[0],&fvec[vector].features[1]);		
	}

	return;
}


void initialize_membership(void)
{
	int i;

	for (i = 0; i < MAX_FEATURE_VECTORS; i++) {

		if (i < K) {

			fvec[i].class = i;

		}
		else {

			fvec[i].class = -1;

		}

	}

	return;
}


double geometricDistance(int proto_vector, int centroid)
{
	int feature;
	double gd = 0.0;

	for (feature = 0; feature < MAX_FEATURES; feature++) {

		gd += (((double)fvec[proto_vector].features[feature] -
			centroids[centroid][feature]) *
			((double)fvec[proto_vector].features[feature] -
				centroids[centroid][feature]));

	}

	return(sqrt(gd));
}


void compute_centroids(int cluster)
{
	int proto_vector, feature;
	int total = 0;

	/* Clear the centroid vector */
	for (feature = 0; feature < MAX_FEATURES; feature++) {
		centroids[cluster][feature] = 0.0;
	}

	/* Calculate the centroid vector for the current cluster */
	for (proto_vector = 0; proto_vector < MAX_FEATURE_VECTORS;
		proto_vector++) {

		if (fvec[proto_vector].class == cluster) {

			for (feature = 0; feature < MAX_FEATURES; feature++) {

				centroids[cluster][feature] +=
					(double)fvec[proto_vector].features[feature];

			}

			total++;

		}

	}

	/* Compute the average for the centroid */
	for (feature = 0; feature < MAX_FEATURES; feature++) {

		centroids[cluster][feature] /= (double)total;

	}

	return;
}


int partition_feature_vector(int proto_vector)
{
	int cluster, best = 0;
	double gdCur, gdBest = 999.99;

	/* Find the centroid that best matches the prototype feature vector */
	for (cluster = 0; cluster < K; cluster++) {

		gdCur = geometricDistance(proto_vector, cluster);

		if (gdCur < gdBest) {

			best = cluster;
			gdBest = gdCur;

		}

	}

	return best;
}


void k_means_clustering(void)
{
	int done = 0;
	int old, new;
	int proto_vector;

	while (!done) {

		done = 1;

		/* Iterate through the available prototype feature vectors */
		for (proto_vector = MAX_FEATURE_VECTORS - 1; proto_vector >= 0;
			proto_vector--) {

			/* Find the cluster to which this prototype vector belongs */
			new = partition_feature_vector(proto_vector);

			/* Did the vector change classes */
			if (new != fvec[proto_vector].class) {

				old = fvec[proto_vector].class;

				fvec[proto_vector].class = new;

				/* Recompute the affected centroids (-1 = not yet clustered) */
				if (old != -1) {
					compute_centroids(old);
				}

				compute_centroids(new);

				done = 0;

			}

		}

	}

}


void emit_clusters(void)
{
	int class, i, j;

	for (class = 0; class < K; class++) {

		printf("Class %d contains:\n\n", class);

		/* Show classification */
		for (i = 0; i < MAX_FEATURE_VECTORS; i++) {

			if (fvec[i].class == class) {

				printf("\t%2d  [", i);

				for (j = 0; j < MAX_FEATURES; j++) {

					printf("%f ", fvec[i].features[j]);
					//printf("%f ", feature_strings[j][(fvec[i].features[j])]);

				}

				printf("]\n");

			}

		}

		printf("\n");

	}

	return;
}


int main()
{
	int i;

	initialize_prototype_vectors();

	initialize_membership();

	for (i = 0; i < K; i++) {
		compute_centroids(i);
	}

	k_means_clustering();

	printf("下面是聚类中心:\n");
	for (int i = 0; i < K; i++)
		printf("class %d 中心是: %f\n",i, centroids[K][MAX_FEATURES]);
	printf("下面是具体的分类:\n");
	emit_clusters();

	system("pause");
	return 0;
}


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值