机器学习之k-means

简单实现在鸢尾花数据的聚类,经过验证效果不错。数据在机器学习之k-nn文章里。

import numpy as np

import csv
import random
def loadData(filename):
    key_value = {'Iris-setosa':0,'Iris-versicolor':1,'Iris-virginica':2}
    all_data_label = []
    with open(filename,'rb') as csvfile:
        lines = csv.reader(csvfile)
        for i in lines:
            temp = [float(i[0]), float(i[1]), float(i[2]), float(i[3])]
            temp1 = [key_value.get(i[4])]
            temp.extend(temp1)
            all_data_label.append(temp)
    return all_data_label
def cal_distances(train_vec,test_vec):
    train_vec = np.array(train_vec)
    test_vec = np.array(test_vec)
    return np.sqrt(sum(train_vec-test_vec)**2)
def randCenter(dataSet,k):
    cols = dataSet.shape[1]
    centroids = np.zeros((k,cols))
    for j in range(cols):
        min_data = min(dataSet[:,j])
        max_data = max(dataSet[:,j])
        range_data = max_data-min_data
        centroids[:,j] = min_data + range_data *np.random.random((k,))
    return centroids
def k_mearns(dataSet,k):
    rows = dataSet.shape[0]
    clusterAssment = np.zeros((rows,2))
    centroids = randCenter(dataSet,k)
    cluster_change = True
    while cluster_change:
        cluster_change = False
        for i in range(rows):
            minDist = 10000
            minIndex = -1
            for j in range(k):
                distances = cal_distances(dataSet[i,:],centroids[j,:])
                if distances < minDist:
                    minDist = distances
                    minIndex = j
            if clusterAssment[i,1] != minIndex:
                cluster_change = True
            clusterAssment[i,:] = minDist,minIndex
        for cent in range(k):
            bool_value = [clusterAssment[:,1]==cent]
            data = dataSet[bool_value]
            #print data
            centroids[cent,] = np.mean(data,axis=0)
    return centroids,clusterAssment
if __name__ == "__main__":
    all_data_label = np.asarray(loadData('./Iris.data'))
    data = all_data_label[:,:4]
    #random.shuffle(data)
    center,cluster=k_mearns(data,3)
    #print center,
    print cluster
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值