K-means 算法（Python代码）

最新推荐文章于 2024-07-19 20:50:48 发布

Chordx

最新推荐文章于 2024-07-19 20:50:48 发布

阅读量2.4k

点赞数 3

分类专栏：机器学习模型文章标签：聚类机器学习 python

本文链接：https://blog.csdn.net/qq_40241232/article/details/112259823

版权

机器学习模型专栏收录该内容

1 篇文章 0 订阅

订阅专栏

## 导入相应的库
import random
import numpy as np
import matplotlib.pyplot as plt

## 获取数据（生成）
nn = 100 # 每类（maybe）样本数目
def get_clusters():
    mean1 = [0,0]
    cov1 = [[0.1,0],[0,0.1]]
    data1 = np.random.multivariate_normal(mean1,cov1,nn)
    
    mean2 = [1.25,1.25]
    cov2 = [[0.1,0],[0,0.1]]
    data2 = np.append(data1,
                     np.random.multivariate_normal(mean2,cov2,nn),
                     0)
    
    mean3 = [-1.25,1.25]
    cov3 = [[0.1,0],[0,0.1]]
    data3 = np.append(data2,
                     np.random.multivariate_normal(mean3,cov3,nn),
                     0)
    
    return np.round(data3,4)

data = get_clusters() # 拿到数据

## 数据可视化
"""
def show_scatter(data):
	x,y = data.T
	plt.plot(x[:nn],y[:nn],'b+');
	plt.plot(x[nn:2*nn],y[nn:2*nn],'r+');
	plt.plot(x[2*nn:3*nn],y[2*nn:3*nn],'g+');
	plt.axis()
	plt.title("scatter")
	plt.xlabel("x")
	plt.ylabel("y")

show_scatter(data)
"""

## K-means 算法
k = 4 # 划分的类别数目
point_ind = random.sample([i for i in range(3*nn)],k)
center_point = data[point_ind,:]  # k类中心点坐标 
while 1:
    data_label = np.zeros((3*nn,1),dtype=np.int)
    for i in range(3*nn):
        distance = np.zeros(k)
        for j in range(k):
            distance[j] = np.linalg.norm(data[i,:]-center_point[j,:])
        norm = np.argmin(distance)
        data_label[i] = norm 
    new_center_point = np.zeros((k,2))
    number_label = np.zeros(k,dtype=np.int) #统计每类数目
    m = 0
    for i in range(k):
        for j in range(3*nn):
            if data_label[j] == i:
                number_label[i] += 1
                new_center_point[i,:] += data[j,:]
        new_center_point[i,:] /= number_label[i]
        if np.linalg.norm(new_center_point[i,:] - center_point[i,:]) < 0.1:
            m += 1
    if m == k:
        break
    else:
        center_point = new_center_point

## 数据可视化（根据类别k进行调整）

plt.figure(1)
for i in range(3*nn):
    if data_label[i] == 0: 
        plt.plot(data[i,0],data[i,1],'r*')
        plt.plot(center_point[0,0],center_point[0,1],'ko')
    elif data_label[i] == 1: 
        plt.plot(data[i,0],data[i,1],'g*')
        plt.plot(center_point[1,0],center_point[1,1],'ko')
    elif data_label[i] == 2:
        plt.plot(data[i,0],data[i,1],'b*');
        plt.plot(center_point[2,0],center_point[2,1],'ko')
    elif data_label[i] == 3:
        plt.plot(data[i,0],data[i,1],'y*')
        plt.plot(center_point[3,0],center_point[3,1],'ko')
plt.axis()
plt.title("scatter")
plt.xlabel("x")
plt.ylabel("y")
plt.show()

## 调包
"""
from  sklearn.cluster  import KMeans # 导入k-means

km = KMeans(n_clusters=k)
#  训练数据
km.fit(data)
 
# 进行预测
y_predict = km.predict(data)
 
# 获取聚类中心
center = km.cluster_centers_
"""

结果
在这里插入图片描述

Chordx

关注

3
点赞
踩
14

收藏

觉得还不错? 一键收藏
3
评论
K-means 算法（Python代码）

## 导入相应的库import randomimport numpy as npimport matplotlib.pyplot as plt## 获取数据（生成）nn = 100 # 每类（maybe）样本数目def get_clusters(): mean1 = [0,0] cov1 = [[0.1,0],[0,0.1]] data1 = np.random.multivariate_normal(mean1,cov1,nn) mean2 = [1
复制链接

扫一扫