西瓜书k-means聚类算法(python)

目录

 

算法原理:

算法流程:

编程实现:


算法原理:

算法流程:

编程实现:

import numpy as np
import matplotlib.pyplot as plt
from numpy import *  # 导入numpy的库函数
from scipy.spatial.distance import pdist


# 读取txt文件
def load_data(filename):
    return np.loadtxt(filename, delimiter="\t")


# 欧氏距离
def dist_Eclud(vecA, vecB):
    x = np.square(vecA[0] - vecB[0])
    y = np.square(vecA[1] - vecB[1])
    return np.sqrt(x + y)


# 麦哈顿距离
def distManhattan(vecA, vecB):
    return np.sum(np.abs(vecA - vecB))


# 余弦距离
def distCosine(vecA, vecB):
    return pdist(np.vstack([vecA, vecB]), 'cosine')


# 切比雪夫距离
def distChebyshev(vecA, vecB):
    return np.max(np.abs(vecA - vecB))


# 初始化质心,得到k个随机质心
def initialize_vec(sample_set, k):
    m, n = np.shape(sample_set)
    new_sample = np.zeros((k, 2))
    for i in range(k):
        a = np.random.randint(0, m)
        new_sample[i] = sample_set[a]
    return new_sample


# 初始化簇类,得到k个空集簇类
def initialize_class(C, k):
    for i in range(k):
        C[i] = []
    return C


# 损失函数计算
def loss(k, A, initialize_mat):
    losses = 0
    for i in range(k):
        for sums in range(len(A[i])):
            losses += dist_Eclud(initialize_mat[i], A[i][sums])
    return losses / k


# 绘制图像
def plot_img(k, A, initialize_mat, mark, mark1):
    for num in range(k):
        for sums in range(len(A[num])):
            plt.plot(A[num][sums][0], A[num][sums][1], mark[num])
        plt.plot(initialize_mat[num][0], initialize_mat[num][1], mark1[num])
    plt.show()


# k均值聚类
def kmean(k):
    b = 0  # 定义迭代次数
    m, n = np.shape(test_set)
    new_mat = np.zeros((k, 2))
    C = {}
    C = initialize_class(C, k)
    distance = np.zeros(k)
    initialize_mat = initialize_vec(test_set, k)
    while b < 400:
        for j in range(m):
            for i in range(k):
                # mat = dist_Eclud(test_set[j], initialize_mat[i])
                # mat = distManhattan(test_set[j], initialize_mat[i])
                # mat = distCosine(test_set[j], initialize_mat[i])
                mat = distChebyshev(test_set[j], initialize_mat[i])
                distance[i] = mat
            a = list(distance)
            min_index = a.index(min(a))
            C[min_index].append(test_set[j])
        A = C
        C = {}
        C = initialize_class(C, k)
        for i in range(k):
            length = len(A[i])
            new_mat[i] = sum(A[i], axis=0) / length
            if (new_mat[i] == initialize_mat[i]).all():
                initialize_mat[i] = initialize_mat[i]
            else:
                initialize_mat[i] = new_mat[i]
        b = b + 1
    return A, initialize_mat


k = 4

mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr']
mark1 = ['^b', '+b', 'sb', 'db', '<b', 'pb']
test_set = load_data("testSet.txt")
classed_mat, centre_point = kmean(k)
print("center_point:", centre_point)
losses = loss(k, classed_mat, centre_point)
print("mean_loss:", losses)
plot_img(k, classed_mat, centre_point, mark, mark1)

 结果:

 

  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值