sklearn-KMeans-花卉识别

Iris Species

KMeans核心算法

import numpy as np
import pandas as pd
import sklearn as sl

class Kmeans:
    def __init__(self,data,k):
        self.data = data
        self.k = k

    #开始训练,核心是指定迭代次数
    def train(self,max_iterations):
        #随机选k个质心
        centroids = Kmeans.centroids_init(self.data,self.k)
        #开始训练
        num_examples = self.data.shape[0]
        closest_centorids_ids = np.empty((num_examples,1))
        #算距离
        for _ in range(max_iterations):
            # 算每个点到质心的最近距离
            closest_centorids_ids = Kmeans.centroids_find_closest(self.data,centroids)
            # 更新中心点位置
            centroids = Kmeans.centroids_compute(self.data,closest_centorids_ids,self.k)
        return centroids,closest_centorids_ids


    @staticmethod
    def centroids_init(data,k):
        #指定数据源
        num_examples = data.shape[0]
        #permutation不会洗牌,不改变原始数据.shuffle会改变
        # random_ids = np.random.shuffle(num_examples)
        random_ids = np.random.permutation(num_examples)
        #定义中心点
        centroids = data[random_ids[:k],:]
        return centroids

    @staticmethod
    def centroids_find_closest(data,centroids):
        #导入数据
        num_examples = data.shape[0]
        num_centroids = centroids.shape[0]
        #初始化
        closest_centroids_ids = np.zeros((num_examples,1))
        for example_index in range(num_examples):
            #定义多少个distance,和类别个数一样多
            distance = np.zeros((num_centroids, 1))
            #每一个k的位置
            for centroid_index in range(num_centroids):
                distance_diff = data[example_index,:] - centroids[centroid_index,:]
                distance[centroid_index] = np.sum(distance_diff ** 2)
            closest_centroids_ids[example_index] = np.argmin(distance)
        return closest_centroids_ids

    #计算质心
    @staticmethod
    def centroids_compute(data,closest_centroids_ids,k):
        num_features = data.shape[1]
        centroids = np.zeros((k,num_features))
        for centroid_id in range (k):
            closest_ids = closest_centroids_ids == centroid_id
            # 返回每一个簇的质心
            centroids[centroid_id] = np.mean(data[closest_ids.flatten(),:],axis=0)
        return centroids

花卉识别

原始数据

数据下载地址:
https://www.kaggle.com/uciml/iris

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from KMeans import KMeans

data = pd.read_csv('./archive/Iris.csv')

iris_types = ['Iris-setosa','Iris-versicolor','Iris-virginica']
x_axis = 'PetalLengthCm'
y_axis = 'PetalWidthCm'
plt.figure(figsize=(12,5))
#生成一个一行两列的图,激活第一个
plt.subplot(1,2,1)
# 根据已经存在的数据类别,画散点图
for iris_type in iris_types:
    plt.scatter(data[x_axis][data['Species']==iris_type],
                data[y_axis][data['Species']==iris_type],
                label=iris_type)
plt.title('known')
plt.legend()
#生成一个一行两列的图,激活第二个
plt.subplot(1,2,2)
# 画一个没有分类的图
plt.scatter(data[x_axis][:],data[y_axis][:])
plt.title('unknown')
plt.show()

在这里插入图片描述

KMeans 算法的应用

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from KMeans import Kmeans

data = pd.read_csv('./archive/Iris.csv')

iris_types = ['Iris-setosa','Iris-versicolor','Iris-virginica']
x_axis = 'PetalLengthCm'
y_axis = 'PetalWidthCm'

num_examples = data.shape[0]
x_train = data[[x_axis,y_axis]].values.reshape(num_examples,2)

#指定好训练所需要的参数
k = 3
max_itritions = 1000

k_means = Kmeans(x_train,k)

centroids,closest_centroids_ids = k_means.train(max_itritions)
print('centroids,closest_centroids_ids',centroids,closest_centroids_ids)

#对比结果
plt.figure(figsize=(12,5))
#生成一个一行两列的图,激活第一个
plt.subplot(1,2,1)
# 根据已经存在的数据类别,画散点图
for iris_type in iris_types:
    plt.scatter(data[x_axis][data['Species']==iris_type],
                data[y_axis][data['Species']==iris_type],
                label=iris_type)
plt.title('known')
plt.legend()

# 画kmeans分类后的
plt.subplot(1,2,2)
for centroid_id ,centroid in enumerate(centroids):
    # 分类别
    current_examples_index = (closest_centroids_ids == centroid_id).flatten()
    plt.scatter(data[x_axis][current_examples_index],
                data[y_axis][current_examples_index],
                label=iris_types[centroid_id])

for centroid_id,centroid in enumerate(centroids):
    plt.scatter(centroid[0],centroid[1],color='black',marker='o')

plt.legend()
plt.title('KMeans')
plt.show()

在这里插入图片描述

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值