cluster k~均值向量算法


#----------------------------------------------工具类代码-------------------------
import  os
import  numpy as np
import  string
import matplotlib.pyplot as pl

'''
    -author-: kenny adelaide
    time: 2018/3/28
    company: 西华师范大学2期理科楼B313实验室
    description: cluster 算法实现
    classfiy : cluster-ulti 工具包    
'''

# 链式编写法则
class cluster:
    '''
    #这是一个工具类
    '''
    def __init__(self):
        pass

    # 初始化cluster k 均值算法的相关参数
    def initParam(self,params):

        if len(params)>0:
          self.params=params
        else:
            print('请初始化cluster 算法的相关参数....')
            return
        self.params=params
        self.u={}
        self.path=self.params['path'] # 数据源文件的路径
        self.flag=self.params['flag'] # 数据源文件的操作方式 r : read, w: write
        self.CiNumber=int(self.params['CiNumber'])  # 这是一个将整个数据集划分的一个标量
        self.u[0]= self.params['u1']
        self.u[1] = self.params['u2']
        self.u[2] = self.params['u3']
        self.assemblys={0:[],1:[],2:[]};
        #print(self.u)
        return self

    # read data from data.txt
    @property
    def read_datafromfile(self):
        f = open(self.path, self.flag)
        lines = f.readlines()
        lines = [lines[i].lstrip().strip().split(' ') for i in np.arange(len(lines))]
        self.lines=lines
        #print(lines)
        return self

    # 计算样本xj 与每个均值向量u 之间的距离
    def distanceU(self,line,u):
        distance=0.0
        m={0:[],1:[],2:[]}
        for i in np.arange(len(u)):
            distance1= (float(u[i][0]) - float(line[0]))*(float(u[i][0]) - float(line[0]))
            distance2 = (float(u[i][1]) - float(line[1])) * (float(u[i][1]) - float(line[1]))
            distance = np.abs(np.sqrt(distance1+distance2)) ;
            m[i]=  float(distance)
        return np.min(m) # 返回最小的距离

    # 计算新的均值向量
    def updateU(self):
        for i in np.arange(3):
            sum1 = 1;
            sum2 = 1;
            p=2.000
            lens=0
            if len(self.assemblys[i])>=0:
                lens=len(self.assemblys[i])
                p= float(1/(lens+1))    # 计算Ci 的长度
            for j in np.arange(len(self.assemblys[i])):
                sum1= sum1+ float(self.assemblys[i][j][0])
                sum2= sum2+ float(self.assemblys[i][j][1])
             # 更新当前的均值向量u的赋值
            sum1=sum1*p
            sum2=sum2*p
            self.u[i][0]= float(str(sum1)[0:5])
            self.u[i][1] = float(str(sum2)[0:5])
        return self

    # 分类器函数
    @property
    def culculate(self):
        for j in np.arange(1000):
            for i in np.arange(len(self.lines)):
                # 计算样本xj 与每个均值向量u 之间的距离
                mindistance=self.distanceU(self.lines[i],self.u)
                #print(mindistance)
                result=((mindistance[0] if mindistance[0] < mindistance[1] else mindistance[1] )
                        if (mindistance[0] if mindistance[0] < mindistance[1] else mindistance[1] )
                           <mindistance[2] else mindistance[2])
                #print(result)

                for k in np.arange(len(mindistance)):
                    if mindistance[k]==result:
                        self.assemblys[k].append(self.lines[i])
                #根据距离进的均值向量确定xj 的簇标记
                # 将样本xj 划入相应的簇

            for i in  np.arange(self.CiNumber):
                # 计算新的均值向量
                # 跟新当前新的均值向量
                # 判断当前的均值向量, 如果均值向量没有改变,则保持当前的均值向量,停止迭代
                u=self.u
                self.updateU()
                if u==self.u:
                    #print('第j 次 停止:')
                    break
            print('新的均值向量:')
            print(self.u)
            print('0 类:')
            print(self.assemblys[0])
            print('1 类:')
            print(self.assemblys[1])
            print('2 类:')
            print(self.assemblys[2])
            self.assemblys = {0: [], 1: [], 2: []};
            print('\n')
        return  self
#------------------------------------------------调用代码
import os
import  numpy as np
import string
import ulti
import matplotlib.pyplot as plt
import math

# 随机选取三个样本作为初始值均向量
# 读取数据源的相关参数
params = { 'path': 'data.txt',
           'flag': 'r',
           'CiNumber': 3,
           'u1': [0.203, 0.337],
           'u2': [0.143, 0.199],
           'u3': [0.378, 0.473]
         };
ulti.cluster().initParam(params).read_datafromfile.culculate



#-----------------------------------------------------
迭代 n-1 ,n-2 次的结果
新的均值向量:
{0: [0.437, 0.323], 1: [0.692, 0.278], 2: [0.8, 0.598]}
0 类:
[['0.245', '0.057'], ['0.343', '0.099'], ['0.360', '0.370'], ['0.359', '0.188'], ['0.339', '0.241'], ['0.282', '0.257'], ['0.483', '0.312'], ['0.478', '0.437'], ['0.525', '0.369'], ['0.532', '0.472'], ['0.473', '0.376'], ['0.446', '0.459'], ['0.403', '0.237'], ['0.481', '0.149'], ['0.437', '0.211'], ['0.243', '0.267']]
1 类:
[['0.639', '0.161'], ['0.657', '0.198'], ['0.593', '0.042'], ['0.719', '0.103'], ['0.748', '0.232'], ['0.714', '0.346'], ['0.774', '0.376'], ['0.634', '0.264'], ['0.608', '0.318'], ['0.556', '0.215'], ['0.666', '0.091']]
2 类:
[['0.751', '0.489'], ['0.752', '0.445'], ['0.697', '0.460']]




新的均值向量:
{0: [0.437, 0.323], 1: [0.692, 0.278], 2: [0.8, 0.598]}
0 类:
[['0.245', '0.057'], ['0.343', '0.099'], ['0.360', '0.370'], ['0.359', '0.188'], ['0.339', '0.241'], ['0.282', '0.257'], ['0.483', '0.312'], ['0.478', '0.437'], ['0.525', '0.369'], ['0.532', '0.472'], ['0.473', '0.376'], ['0.446', '0.459'], ['0.403', '0.237'], ['0.481', '0.149'], ['0.437', '0.211'], ['0.243', '0.267']]
1 类:
[['0.639', '0.161'], ['0.657', '0.198'], ['0.593', '0.042'], ['0.719', '0.103'], ['0.748', '0.232'], ['0.714', '0.346'], ['0.774', '0.376'], ['0.634', '0.264'], ['0.608', '0.318'], ['0.556', '0.215'], ['0.666', '0.091']]
2 类:
[['0.751', '0.489'], ['0.752', '0.445'], ['0.697', '0.460']]
 
明显可以看到:均值向量没有更新

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值