K-Means Python实现

转载自http://nathanlvzs.github.io/blog/Clustering-KMeans.html

实现代码基本参考K-Means聚类及其Python实现,中间加了一些对距离矩阵的理解,将源码自己研究一遍,逐渐掌握用python进行矩阵运算。其中包括:

import numpy as np
np.sum()   #axis=0,1
np.outer()
np.dot()
np.mean()
np.square()


# -*- coding: utf-8 -*-
"""
Created on Mon Sep 19 22:23:34 2016

@author: soso
"""

import numpy as np
import matplotlib.pyplot as plt
from numpy import array
import matplotlib.cm as cm
import matplotlib
#%matplotlib inline

mean1 = (0, 8)
mean2 = (5, 5)
mean3 = (1, -1)

cov = [[1, 0], [0, 1]]
x1 = np.random.multivariate_normal(mean1, cov, 20)# shape: 20*2
#x1=array([[1,1],[2,2]])
y1=[0 for i in range(20)]

x2 = np.random.multivariate_normal(mean2, cov, 30)
#x2=array([[1,0],[2,0],[3,0]])
y2=[1 for i in range(30)]

x3 = np.random.multivariate_normal(mean3, cov, 20)
#x3=array([[0,1],[0,2]])
y3=[2 for i in range(20)]

x = np.concatenate((x1, x2, x3), axis=0)
print "x:"
print x
#print x.shape
y=y1+y2+y3
#colors=array([[1,0,0],[0,1,0],[0,0,1]])
colors=cm.rainbow(np.linspace(0, 1, 3))
color_label=[colors[label] for label in y]
#plt.scatter(x[:,0],x[:,1],c=color_label)
#plt.show()
#print cm.rainbow(np.linspace(0, 1, 3))
class kmeansclustering:
    def __init__(self,data,k,maiter=100,epsilon=1e-12):
        self.data=data
        self.k=k
        self.maiter=maiter
        self.epsilon=epsilon
        self.N=len(data)
        self.colors=cm.rainbow(np.linspace(0,1,k))
        self.classess=np.zeros(self.N,dtype=int)
        #self.center = self.data[np.random.choice(self.N, self.k, replace=False), :]
        self.center=array([[0,0],[1,1],[2,2]])
    def getdismat(self):
        #数据各个维度的平方和
        data_sqrt_sum=np.sum(self.data*self.data,axis=1)
        #print data_sqrt_sum
        #质心的各个维度的平方和
        cent_sqrt_sum=np.sum(self.center*self.center,axis=1)
        #数据和质心的内积
        #数据为N个,质心为K个
        #内积结果为N*K矩阵
        #[x11,x12]  [[c11,c12],
        #            [c21,c22],...
        #            [ck1,ck2]]
        #结果为
        #        [x11*c11+x12*c12, x11*c21+x11*c22....   x11*ck1+x12*ck2]
        #        [x21*c11+x22*c12, x21*c21+x22*c22.....  x21*ck1+x22*ck2]   
        #           ..............................................
        #        [xn1*c11+xn2*c12, xn1*c21+xn2*c22...... xn1*ck1+xn2*ck2]
        dot_data_center=np.dot(self.data,self.center.T)
        return np.outer(data_sqrt_sum,np.ones((1,self.k)))- \
            2*dot_data_center+np.outer(np.ones((self.N,1)),cent_sqrt_sum) 
    def cal_cost(self):
        cost=0;
        for i in xrange(self.N):
            cost+=np.sum((np.square(self.data[i]-self.center[self.classess[i]])))
        return cost
    def kmeans(self,plot=True):
        numiter=0
        pre_cost=self.cal_cost()
        if plot:
            self.draw()
        while numiter<self.maiter:
            distmat=self.getdismat()
            self.classess=np.argmin(distmat,axis=1)
            for c in xrange(self.k):
                self.center[c]=np.mean(self.data[self.classess==c],axis=0)
            now_cost=self.cal_cost()
            if pre_cost-now_cost<self.epsilon:
                print "break befor maxiter...."
                break
            if plot:
                self.draw()
            pre_cost=now_cost
            numiter+=1
    def draw(self,plotcen=True):
        plt.figure(figsize=(10,10),facecolor='white')
        colors_data=[self.colors[c] for c in self.classess]
        plt.scatter(self.data[:,0],self.data[:,1],color=colors_data,marker=',',alpha=0.9,s=80)
        plt.axis('equal')
        if(plotcen):
            plt.scatter(self.center[:,0],self.center[:,1],marker='o',color=self.colors,s=200)
        plt.show()

kmeans=kmeansclustering(data=x,k=3)
print kmeans.getdismat()
print kmeans.cal_cost()
kmeans.kmeans()

参考:

K-means聚类算法

深入浅出k-means

K-Means聚类及其Python实现

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值