Python-sklearn-Kmeans(1)

"""
@Title: K-means
@Time: 2024/2/16
@Author: Michael Jie
"""

import numpy as np
from sklearn.cluster import KMeans

print("----------手动实现K-means----------")
# 数据集,两个特征
data_sets = [[1, 2],
             [4.5, 0.2],
             [95, 97],
             [95.5, 95],
             [2.9, 1],
             [96, 100],
             [4.2, 3.2],
             [98.7, 95],
             [2.1, 1.1],
             [95.4, 99]]
data_sets = np.array(data_sets)
# 数据集长度
m = len(data_sets)
# 聚类数
n = 2

# 初始化组中点
x1 = data_sets[0]
x2 = data_sets[-1]
print("初始的组中点为:({x1}, {x2})".format(x1=x1, x2=x2))

# 将数据集按组分类,数据集比较简单,无需迭代
sets1 = []
sets2 = []
for data_set in data_sets:
    dev1 = np.sum(np.abs(data_set - x1))
    dev2 = np.sum(np.abs(data_set - x2))
    # 判断数据靠近哪个中点
    if dev1 > dev2:
        sets1.append(data_set)
    else:
        sets2.append(data_set)
print("组数据为:({set1}, {set2})".format(set1=sets1, set2=sets2))

# 更新中心位置
sets1 = np.array(sets1)
sets2 = np.array(sets2)
x1[0] = np.sum(sets1.T[0]) / len(sets1)
x1[1] = np.sum(sets1.T[1]) / len(sets1)
x2[0] = np.sum(sets2.T[0]) / len(sets2)
x2[1] = np.sum(sets2.T[1]) / len(sets2)
print("分组后的中点为:({x1}, {x2})".format(x1=x1, x2=x2))

# 计算损失
loss = 0
for set1 in sets1:
    loss += np.sum(np.square(set1 - x1))
for set2 in sets2:
    loss += np.sum(np.square(set2 - x2))
print("损失为:{loss}".format(loss=loss))

print("----------使用sklearn实现K-means----------")
# 数据集
data_sets = [[1, 2],
             [4.5, 0.2],
             [95, 97],
             [95.5, 95],
             [2.9, 1],
             [96, 100],
             [4.2, 3.2],
             [98.7, 95],
             [2.1, 1.1],
             [95.4, 99]]
# 创建KMeans对象
k_means = KMeans(2, random_state=0)
k_means.fit(data_sets)
print("分组后的中点为:{x}".format(x=k_means.cluster_centers_))
print("损失为:{loss}".format(loss=k_means.inertia_))

"""
----------手动实现K-means----------
初始的组中点为:([1. 2.], [95.4 99. ])
组数据为:([array([95., 97.]), array([95.5, 95. ]), array([ 96., 100.]), array([98.7, 95. ]), array([95.4, 99. ])], 
[array([1., 2.]), array([4.5, 0.2]), array([2.9, 1. ]), array([4.2, 3.2]), array([2.1, 1.1])])
分组后的中点为:([96.12 97.2 ], [2.94 1.5 ])
损失为:43.360000000000014
----------使用sklearn实现K-means----------
分组后的中点为:[[96.12 97.2 ]
[ 2.94  1.5 ]]
损失为:43.36000000000001
"""

  • 12
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值