k均值聚类的python代码_k-均值聚类Python代码实现

这里给出两种方式的k-均值实现,code主要来自于网络:

# reference: https://mubaris.com/2017/10/01/kmeans-clustering-in-python/

from copy import deepcopy

import numpy as np

import pandas as pd

from matplotlib import pyplot as plt

#plt.rcParams['figure.figsize'] = (16, 9)

#plt.style.use('ggplot')

# Importing the dataset

data = pd.read_csv('E:/GitCode/NN_Test/data/database/xclara.csv')

#print(data.shape)

data.head()

# Getting the values and plotting it

f1 = data['V1'].values

f2 = data['V2'].values

X = np.array(list(zip(f1, f2)))

#plt.scatter(f1, f2, c='black', s=7)

# Euclidean Distance Caculator

def dist(a, b, ax=1):

return np.linalg.norm(a - b, axis=ax)

# Number of clusters

k = 3

# X coordinates of random centroids

C_x = np.random.randint(0, np.max(X)-20, size=k)

# Y coordinates of random centroids

C_y = np.random.randint(0, np.max(X)-20, size=k)

C = np.array(list(zip(C_x, C_y)), dtype=np.float32)

#print(C)

# Plotting along with the Centroids

#plt.scatter(f1, f2, c='#050505', s=7)

#plt.scatter(C_x, C_y, marker='*', s=200, c='g')

# To store the value of centroids when it updates

C_old = np.zeros(C.shape)

# Cluster Lables(0, 1, 2)

clusters = np.zeros(len(X))

# Error func. - Distance between new centroids and old centroids

error = dist(C, C_old, None)

# Loop will run till the error becomes zero

while error != 0:

# Assigning each value to its closest cluster

for i in range(len(X)):

distances = dist(X[i], C)

cluster = np.argmin(distances)

clusters[i] = cluster

# Storing the old centroid values

C_old = deepcopy(C)

# Finding the new centroids by taking the average value

for i in range(k):

points = [X[j] for j in range(len(X)) if clusters[j] == i]

C[i] = np.mean(points, axis=0)

error = dist(C, C_old, None)

colors = ['r', 'g', 'b', 'y', 'c', 'm']

fig, ax = plt.subplots()

for i in range(k):

points = np.array([X[j] for j in range(len(X)) if clusters[j] == i])

ax.scatter(points[:, 0], points[:, 1], s=7, c=colors[i])

ax.scatter(C[:, 0], C[:, 1], marker='*', s=200, c='#050505')

plt.show()

执行结果如下:

# reference: https://docs.opencv.org/3.0-beta/doc/py_tutorials/py_ml/py_kmeans/py_kmeans_opencv/py_kmeans_opencv.html

import numpy as np

import cv2

from matplotlib import pyplot as plt

X = np.random.randint(25,50,(25,2))

Y = np.random.randint(60,85,(25,2))

Z = np.vstack((X,Y))

# convert to np.float32

Z = np.float32(Z)

# define criteria and apply kmeans()

criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)

ret,label,center=cv2.kmeans(Z,2,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)

# Now separate the data, Note the flatten()

A = Z[label.ravel()==0]

B = Z[label.ravel()==1]

# Plot the data

plt.scatter(A[:,0],A[:,1])

plt.scatter(B[:,0],B[:,1],c = 'r')

plt.scatter(center[:,0],center[:,1],s = 80,c = 'y', marker = 's')

plt.xlabel('Height'),plt.ylabel('Weight')

plt.show()

执行结果如下:

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
经典k均值聚类Python代码: ``` import numpy as np class KMeans: def __init__(self, k=2, max_iter=100): self.k = k self.max_iter = max_iter def fit(self, X): self.centroids = [] # Initialize centroids randomly for i in range(self.k): self.centroids.append(X[np.random.randint(0, len(X), size=1)[0]]) for i in range(self.max_iter): # Assign points to nearest centroid clusters = [[] for _ in range(self.k)] for x in X: distances = [np.linalg.norm(x - c) for c in self.centroids] cluster = np.argmin(distances) clusters[cluster].append(x) # Update centroids for j in range(self.k): self.centroids[j] = np.mean(clusters[j], axis=0) def predict(self, X): predictions = [] for x in X: distances = [np.linalg.norm(x - c) for c in self.centroids] cluster = np.argmin(distances) predictions.append(cluster) return predictions ``` 模糊k均值聚类Python代码: ``` import numpy as np class FuzzyKMeans: def __init__(self, k=2, m=2, max_iter=100): self.k = k self.m = m self.max_iter = max_iter def fit(self, X): # Initialize membership matrix randomly self.U = np.random.rand(len(X), self.k) self.U = self.U / np.sum(self.U, axis=1)[:, None] for i in range(self.max_iter): # Update centroids centroids = [] for j in range(self.k): centroid = np.sum((self.U[:,j]**self.m)[:,None] * X, axis=0) / np.sum(self.U[:,j]**self.m) centroids.append(centroid) # Update membership matrix distances = np.zeros((len(X), self.k)) for j in range(self.k): distances[:,j] = np.linalg.norm(X - centroids[j], axis=1) self.U = distances ** (-2/(self.m-1)) self.U = self.U / np.sum(self.U, axis=1)[:, None] self.centroids = centroids def predict(self, X): distances = np.zeros((len(X), self.k)) for j in range(self.k): distances[:,j] = np.linalg.norm(X - self.centroids[j], axis=1) predictions = np.argmin(distances, axis=1) return predictions ``` 注意,这里的经典k均值聚类代码采用了随机初始化质心的方式,因此每次运行的结果可能不同。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值