python 实现 Kmeans

本文详细介绍了如何使用Python的Scikit-learn库来实现K-means聚类算法,从数据预处理到模型训练,再到结果可视化,帮助读者理解K-means的工作原理及其在实际中的应用。
摘要由CSDN通过智能技术生成

# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
import random
import math

maps = {}
clr = ['blue', 'green', 'yellow','red','black','cyan', 'pink']

def calcCetroid(X, sub):
    sn = sub.shape[0]
    n,p = X.shape
    centroid = np.zeros(p, dtype=float)
    for i in range(sn):
        centroid += X[sub[i],:]
    return centroid/float(sn)
    
def k_medoids(X,k):
    iterNums = 1000
    n, p = X.shape
    
    distance = np.zeros((k,n))
    ctrx = np.zeros((k,p))
    iter_ctrs = np.zeros((iterNums, k, p))
    
    cetroid = random.sample(range(n), k)
    
    for i in range(k):
        ctrx[i] = np.array(X[cetroid[i],:])
    
    for i in range(100):
        
        for j in range(k):
            for l in range(n):
                distance[j][l] = math.sqrt(np.sum(np.power(ctrx[j]-X[l,:],2)))
                
        dist_index = np.argsort(distance, axis=0)
        
        maps.clear()
            
        for j in range(n):
            keys = dist_index[0][j]
            if not maps.has_key(keys):
                maps[keys] = []
            maps[keys].append(j)
        
        ctrx = np.zeros((k,p))
        
        for (j, l) in zip(range(k), maps.itervalues()):
            
            ctrx[j] = calcCetroid(X, np.array(l)) # jth centroids
            dd = np.zeros(len(l))
            for ii in range(len(l)): # find medoid
                dd[ii] = math.sqrt(np.sum(np.power(ctrx[j]-X[l[ii],:],2)))
            dd = np.argsort(dd)
            ctrx[j] = X[l[dd[0]],:]
            
    for (i,j) in zip(range(k), maps.itervalues()):
        plt.scatter(X[j,0], X[j,1], c=clr[i])
    plt.show() 
    
def k_means(X,k):
    """
        Input:
            X---data point features, n-by-p matrix
            k-- the number of clusters
        output:
            idx cluster label
            ctrx-- cluster centers, k-by-p matrix
            iter_ctrs---cluster centers of each iteration, K-by-p-by-iter 3D matrix
    """
    
    iterNums = 100
    n, p = X.shape
    
    distance = np.zeros((k,n))
    ctrx = np.zeros((k,p))
    iter_ctrs = np.zeros((iterNums, k, p))
    
    cetroid = random.sample(range(n), k)
    
    for i in range(k):
        ctrx[i] = np.array(X[cetroid[i],:])
    
    for i in range(100):
        
        for j in range(k):
            for l in range(n):
                distance[j][l] = math.sqrt(np.sum(np.power(ctrx[j]-X[l,:],2)))
                
        dist_index = np.argsort(distance, axis=0)
        
        maps.clear()
            
        for j in range(n):
            keys = dist_index[0][j]
            if not maps.has_key(keys):
                maps[keys] = []
            maps[keys].append(j)
        
        ctrx = np.zeros((k,p))
        
        for (j, l) in zip(range(k), maps.itervalues()):
            ctrx[j] = calcCetroid(X, np.array(l))
    
    for (i,j) in zip(range(k), maps.itervalues()):
        plt.scatter(X[j,0], X[j,1], c=clr[i])
    plt.show()    
if __name__ == "__main__":
        dataSet = sio.loadmat("kmeans_data.mat")
        X = dataSet["X"]
        k = 5
        #k_means(X,k)
        k_medoids(X,k)
        





















评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值