Affinity Propagation算法的python实现和分析

最新推荐文章于 2024-07-06 02:33:12 发布

那时那月那人

最新推荐文章于 2024-07-06 02:33:12 发布

阅读量1k

点赞数

文章标签：机器学习聚类算法

本文链接：https://blog.csdn.net/xiaoxu1025/article/details/104341874

版权

Affinity Propagation 算法更新方式很简单就是更新 r矩阵在更新a矩阵

公式如下：

其中 s是相似度矩阵一般直接可以采用距离来表示相似度

r 表示吸引度 a归属度

这里就用选举来进行形象讲解下

对于公式1

就好比是一个投票阶段 s(i, k)表示i 选k作为总统(聚类中心)的合适度用r(i, k) 表示选k作为总统的意向有多大

直观来说直接用r(i, k) = s(i, k) - max(s(i, k')) k' != k 貌似更加形象，但是人是感性动物，会受别人影响，这里就是a起到的作用，比如 i 的亲人朋友都投了k'当总统，就会来问我你要不要和我们一样投k', 大家都在投k', 你也快点投k'。然后我经过深思熟虑最终投给k的意向会受到影响(影响的大小取决于a(i, k')的大小)。所以 r(i, k) = s(i, k) - max{s(i, k') + a(i, k')} 最终我投给k的意向

对于公式2

就像是一个拉票，a(i, k)表示形象理解为k对i的影响力而他的大小受k对其他i'的影响力，如果亲人朋友 i' 都投了k, 对于i来说就有很大概率也投k,这就是所谓的随波逐流。越多i'投k, a(i, k)就会也大，也就是说k对i的影响力越大，对于公式1起到的影响就越大。这里加上r(k, k)也很好理解，我自己对自己也有个投票意向，虽然大家都投了我，但是我自己这一票肯定还是要算计来的，万一比别人就差这一票不是亏大了。

迭代停止的条件就是所有的样本的聚类中心都不在变化为止，或者迭代了n次都还没有变化（n的值可以自己取）。

代码如下：

from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
import numpy as np

centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.4,
                            random_state=0)


def calc_similarity(X):
    m, n = np.shape(X)
    X_copy = np.copy(X)
    X = X.reshape((m, 1, n))
    X_copy = X_copy.reshape(1, m, n)
    sum = np.sum(np.square(X[..., :] - X_copy[..., :]), axis=-1)
    similarity = -1 * np.sqrt(sum)
    median_value = calc_median(similarity)
    for i in range(m):
        # preference设置 非对角线元素中值
        similarity[i, i] = median_value
    return similarity


def calc_median(X):
    data = []
    for i in range(len(X)):
        x = X[i]
        x = np.delete(x, i)
        data += list(x)
    n = len(data)
    return data[n // 2]


def affinityPropagation(similarity, lamda=0.):
    # 定义吸引度矩阵和归属感矩阵
    r = np.zeros_like(similarity, dtype=np.int32)
    a = np.zeros_like(similarity, dtype=np.int32)

    m, n = np.shape(similarity)

    last_change = np.zeros((m, ), dtype=np.int32)
    while True:
        # update r matrix
        for idx in range(m):
            a_s_idx = a[idx] + similarity[idx]
            for idy in range(n):
                a_s_idx_del = np.delete(a_s_idx, idy)
                max_value = np.max(a_s_idx_del)
                r_new = similarity[idx, idy] - max_value
                r[idx][idy] = lamda * r[idx][idy] + (1 - lamda) * r_new
        # update a matrix
        for idx in range(m):
            for idy in range(n):
                r_idy = r[:, idy]
                r_idy = np.delete(r_idy, idx)
                a_new = np.sum(np.maximum(0, r_idy))
                if idx != idy:
                    a_new = min(0, r[idy, idy] + a_new)
                a[idx][idy] = lamda * a[idx][idy] + (1 - lamda) * a_new
        r_a = r + a
        # 当聚类中心不再发生变化时停止
        argmax = np.argmax(r_a, axis=1)
        current_change = argmax
        if (last_change == current_change).all():
            break
        last_change = current_change
    print('r', r)
    print('a', a)
    return r + a


def computeCluster(fitable, data):
    clusters = {}
    num = len(fitable)
    for idx in range(num):
        fit = fitable[idx]
        argmax = np.argmax(fit, axis=-1)
        if argmax not in clusters:
            clusters[argmax] = []
        clusters[argmax].append(tuple(data[idx]))
    return clusters


def plotClusters(clusters, title):
    """ 画图 """
    plt.figure(figsize=(8, 5), dpi=80)
    axes = plt.subplot(111)
    col = []
    r = lambda: np.random.randint(0, 255)
    for index in range(len(clusters)):
        col.append(('#%02X%02X%02X' % (r(), r(), r())))
    color = 0
    for key in clusters:
        cluster = clusters[key]
        for idx in range(len(cluster)):
            cluster_idx = cluster[idx]
            axes.scatter(cluster_idx[0], cluster_idx[1], s=20, c=col[color])
        color += 1
    plt.title(title)
    plt.show()


similarity = calc_similarity(X)
fitable = affinityPropagation(similarity, lamda=0.25)
clusters = computeCluster(fitable, X)
print(len(clusters))
plotClusters(clusters, "clusters by affinity propagation")

结果如下图：