C_means算法

模糊C均值(C-means)算法步骤

算法概述

模糊C均值算法是一种基于隶属度的聚类方法,允许单个数据点属于多个聚类中心。这种属于程度由隶属度表示。

初始化

  • 数据输入:输入数据集,指定聚类数量 (C)、模糊系数 (m)(通常为2)、误差阈值和最大迭代次数。
  • 隶属度初始化:随机初始化隶属度矩阵,使得每个数据点对各聚类的隶属度之和为1。

更新聚类中心

  • 根据当前的隶属度矩阵计算每个聚类的中心。聚类中心是数据点的加权平均,权重是其隶属度的 (m) 次幂。

更新隶属度

  • 对于每个数据点和每个聚类,基于数据点与聚类中心之间的距离来更新隶属度。距离越近的聚类中心具有更高的隶属度。

迭代和收敛

  • 重复执行步骤3和4,直到达到最大迭代次数或聚类中心的变化小于预设的误差阈值。

算法输出

  • 输出最终的聚类中心和每个数据点的隶属度矩阵。

错误率计算

  • 如果数据集包含真实类别标签,可以使用匈牙利算法等方法来计算聚类结果的错误率。
import numpy as np
from sklearn.cluster import SpectralClustering

class FuzzyCMeans:
    def __init__(self, data, n_clusters=3, m=2, error=0.005, max_iter=100):
        self.data = data
        self.n_clusters = n_clusters
        self.m = m  # Fuzziness parameter
        self.error = error  # Threshold for stopping criterion
        self.max_iter = max_iter
        self.centers = None
        self.membership = None

    def initialize_membership(self):
        # Randomly initialize the membership matrix so that the sum of memberships for each data point is 1
        membership = np.random.rand(self.data.shape[0], self.n_clusters)
        membership_sum = membership.sum(axis=1).reshape(-1, 1)
        return membership / membership_sum

    def initialize_centers(self,X, C):
        """
        Initialize the cluster centers.
        X: ndarray, shape (n_samples, n_features)
        C: int, number of clusters
        """
        n_samples = X.shape[0]
        indices = np.random.choice(n_samples, C, replace=False)
        return X[indices]

    def update_centers(self):
        # Update the cluster centers based on the membership values
        centers = np.zeros((self.n_clusters, self.data.shape[1]))
        for j in range(self.n_clusters):
            numerator = np.power(self.membership[:, j], self.m).reshape(-1, 1) * self.data
            denominator = np.power(self.membership[:, j], self.m).sum()
            centers[j] = numerator.sum(axis=0) / denominator
        return centers

    def update_membership(self):
        # Update the membership matrix based on the current cluster centers
        membership = np.zeros((self.data.shape[0], self.n_clusters))
        for i in range(self.data.shape[0]):
            for j in range(self.n_clusters):
                sum_term = np.sum([(np.linalg.norm(self.data[i] - self.centers[j]) /
                                    np.linalg.norm(self.data[i] - self.centers[k])) **
                                   (2 / (self.m - 1)) for k in range(self.n_clusters)])
                membership[i, j] = 1 / sum_term
        return membership

    def fit(self):
        # Main loop of the Fuzzy C-Means algorithm
        self.membership = self.initialize_membership()
        self.centers=self.initialize_centers(self.data,self.n_clusters)
        print("初始类中心:")
        print(self.centers)
        print("迭代次数:200")
        for iteration in range(self.max_iter):
            prev_centers = np.copy(self.centers)
            self.centers = self.update_centers()
            self.membership = self.update_membership()
            # Check if the centers have stabilized
            if prev_centers is not None:

                center_change = np.linalg.norm(self.centers - prev_centers)
                if center_change < self.error:
                    break
        return self.centers, self.membership

# Example Usage
# Assume 'data' is a numpy array with your data

# 读取文件
file_path = 'iris.dat'  # 替换为文件的实际路径
with open(file_path, 'r') as file:
    iris_data = file.readlines()

# 将数据转换为NumPy数组
# 每行通过制表符分割,然后将每个元素转换为浮点数
data_array = np.array([line.strip().split('\t') for line in iris_data], dtype=float)
data=data_array[:,:4]
label=data_array[:,4:]
label=label.astype("int").flatten()
fcm = FuzzyCMeans(data, n_clusters=3, m=2,max_iter=200)
centers, membership = fcm.fit()
print("类中心:",centers)
cluster_indices = np.argmax(membership, axis=1)
print("聚类结果",cluster_indices)

from scipy.optimize import linear_sum_assignment

def calculate_error_rate(original_labels, clustered_labels):
    # 构建混淆矩阵
    size = max(np.max(original_labels), np.max(clustered_labels)) + 1
    confusion_matrix = np.zeros((size, size), dtype=int)
    for i in range(len(original_labels)):
        confusion_matrix[original_labels[i], clustered_labels[i]] += 1

    # 使用匈牙利算法(线性求和分配)找到最佳匹配
    row_ind, col_ind = linear_sum_assignment(-confusion_matrix)

    # 计算匹配的总数
    correct_matches = confusion_matrix[row_ind, col_ind].sum()

    # 计算错误率
    error_rate = 1 - correct_matches / len(original_labels)
    return error_rate


error_rate = calculate_error_rate(label, cluster_indices)
print("C_means错误率:",error_rate)

#谱聚类
n_clusters = 3  # 假设我们要将数据聚成3类
clustering = SpectralClustering(n_clusters=n_clusters, affinity='nearest_neighbors',n_neighbors=50, random_state=0)
labels = clustering.fit_predict(data)
error_rate = calculate_error_rate(label, labels)
print("谱聚类错误率:",error_rate)


  • 10
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
以下是一个简单的K-Means聚类的C语言实现示例: ```c #include <stdio.h> #include <stdlib.h> #include <math.h> #include <time.h> #define MAX_ITERATIONS 1000 #define DATA_SIZE 100 #define K 3 // 生成随机数据 void generate_data(float data[][2], int n) { int i; srand((unsigned int)time(NULL)); for (i = 0; i < n; i++) { data[i][0] = (float)(rand() % 100) / 100; data[i][1] = (float)(rand() % 100) / 100; } } // 计算欧氏距离 float euclidean_distance(float x1, float y1, float x2, float y2) { return sqrt(pow((x1 - x2), 2) + pow((y1 - y2), 2)); } // 初始化聚类中心 void init_centers(float data[][2], int n, float centers[][2], int k) { int i, j, index; float max_dist, dist, *min_dist; min_dist = (float *)malloc(sizeof(float) * n); // 随机选择一个点作为第一个聚类中心 index = rand() % n; centers[0][0] = data[index][0]; centers[0][1] = data[index][1]; // 选出剩下的k-1个聚类中心 for (i = 1; i < k; i++) { max_dist = 0; // 计算每个点到当前所有聚类中心的最小距离 for (j = 0; j < n; j++) { dist = euclidean_distance(data[j][0], data[j][1], centers[i-1][0], centers[i-1][1]); min_dist[j] = (i == 1) ? dist : fmin(dist, min_dist[j]); if (min_dist[j] > max_dist) { max_dist = min_dist[j]; index = j; } } centers[i][0] = data[index][0]; centers[i][1] = data[index][1]; } free(min_dist); } // 执行K-Means聚类 void k_means(float data[][2], int n, float centers[][2], int k, int *labels, int max_iterations) { int i, j, iter, index; float dist, min_dist; // 分配每个点的标签 for (i = 0; i < n; i++) { labels[i] = -1; } // 迭代更新聚类中心和标签 for (iter = 0; iter < max_iterations; iter++) { int changed = 0; for (i = 0; i < n; i++) { min_dist = INFINITY; for (j = 0; j < k; j++) { dist = euclidean_distance(data[i][0], data[i][1], centers[j][0], centers[j][1]); if (dist < min_dist) { min_dist = dist; index = j; } } if (labels[i] != index) { labels[i] = index; changed = 1; } } if (!changed) { break; } for (j = 0; j < k; j++) { float sum_x = 0, sum_y = 0; int count = 0; for (i = 0; i < n; i++) { if (labels[i] == j) { sum_x += data[i][0]; sum_y += data[i][1]; count++; } } if (count > 0) { centers[j][0] = sum_x / count; centers[j][1] = sum_y / count; } } } } // 输出聚类结果 void print_result(float data[][2], int n, float centers[][2], int k, int *labels) { int i, j; for (i = 0; i < k; i++) { printf("Cluster %d: ", i); for (j = 0; j < n; j++) { if (labels[j] == i) { printf("(%f, %f) ", data[j][0], data[j][1]); } } printf("\nCenter: (%f, %f)\n", centers[i][0], centers[i][1]); } } int main() { float data[DATA_SIZE][2]; float centers[K][2]; int labels[DATA_SIZE]; generate_data(data, DATA_SIZE); init_centers(data, DATA_SIZE, centers, K); k_means(data, DATA_SIZE, centers, K, labels, MAX_ITERATIONS); print_result(data, DATA_SIZE, centers, K, labels); return 0; } ``` 这个例子中首先生成了100个二维随机数据,然后用K-Means算法将其分成3类。在K-Means算法中,首先随机选择一个点作为第一个聚类中心,然后选出剩下的k-1个聚类中心。接下来,迭代更新每个点的标签和每个聚类中心的位置,直到收敛或达到最大迭代次数。最后输出每个聚类的数据点和聚类中心的位置。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

h52013141

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值