Python:手工代码之K-Means

#DeniuHe手工代码之KMeans
import numpy as np
import pandas as pd
from sklearn import datasets
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist,squareform

def randomCenters(dataSet,k):
    n_row,n_col = dataSet.shape
    centers = np.zeros((k,n_col))
    selected_index = np.random.choice(range(n_row),k)
    for k,index in zip(range(k),selected_index):
        centers[k,:] = dataSet[index,:]
    return centers

def euclideanDist(A,B):
    return np.sqrt(sum((A-B) ** 2))

def K_Means(dataSet,k):
    n,m = dataSet.shape
    random_centers = randomCenters(dataSet,k)
    distMatrix = np.zeros((n,2)) #存放样本点的质心标识及其到质心的距离
    centerChanged = True
    while centerChanged:
        centerChanged = False
        for i in range(n):
            minDist = np.inf
            minIndex = -1
            for j in range(k):
                distMean = euclideanDist(dataSet[i,:],random_centers[j,:])
                if distMean < minDist:
                    minDist = distMean
                    minIndex = j
            if distMatrix[i,0] != minIndex:  # 聚类结果是否有变化,是否需要继续循环
                centerChanged = True
            distMatrix[i,0] = minIndex
            distMatrix[i,1] = minDist
        #更新聚类中心点
        for j in range(k):
            cluster = dataSet[distMatrix[:,0] == j]
            random_centers[j] = np.mean(cluster,axis=0)
    return random_centers,distMatrix

if __name__ == "__main__":
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target
    DataSet = X[:,2:4]
    RandomCents,DistMat = K_Means(DataSet,3)
    for j in range(len(DataSet)):
        if 0 == DistMat[j,0]:
            plt.scatter(DataSet[j,0],DataSet[j,1],c='red',marker='o')
        elif 1 == DistMat[j,0]:
            plt.scatter(DataSet[j,0],DataSet[j,1],c='green',marker='*')
        elif 2 == DistMat[j,0]:
            plt.scatter(DataSet[j, 0], DataSet[j,1], c='blue', marker='+')
    plt.ylabel('petal length')
    plt.xlabel('petal width')
    plt.legend(loc=2)
    plt.show()

该版本是不是会抛异常。

 

版本2:无异常版

import numpy as np
import pandas as pd
from sklearn import datasets as DS
import matplotlib.pyplot as plt

def euclideanDist(A,B):
    return np.sqrt(sum( (A -B ) ** 2 ))

def RandomCenters(dataSet,k):
    n = dataSet.shape[0]
    centerIndex = np.random.choice(range(n),size=k,replace=False)
    centers = dataSet[centerIndex]
    return centers
def KMeans(dataSet,k):
    Centers = RandomCenters(dataSet,k)
    n,m = dataSet.shape
    DistMatrix = np.zeros((n,2))
    centerChanged = True
    while centerChanged == True:
        centerChanged = False
        for i in range(n):
            minDist = np.inf
            minIndex = -1
            for j in range(k):
                dist = euclideanDist(dataSet[i,:],Centers[j,:])
                if dist < minDist:
                    minDist = dist
                    minIndex = j
            if DistMatrix[i,0] != minIndex:
                centerChanged = True
            DistMatrix[i,0] = minIndex
            DistMatrix[i,1] = minDist
        if centerChanged == True:    #如何聚类中心有变化,那么接下来就要更新聚类中心
            for i in range(k):
                dataMean = dataSet[DistMatrix[:,0]==i]  #dataMean中是相同类簇的样本
                Centers[i] = np.mean(dataMean,axis=0)
    return Centers,DistMatrix

if __name__ == "__main__":
    Data = np.array(pd.read_csv(r'E:\data set\clusterData\bolbs_1.csv',header=None))
    X = Data[:,:2]
    N = len(X)
    k = 24
    Center,DistMat = KMeans(X,k)
    print(set(DistMat[:,0]))
    for i in range(N):
        plt.scatter(X[:,0],X[:,1],c=DistMat[:,0])
    plt.show()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值