kmeans
通过给任意个n维的数据已知k 是特征的个数,然后将任意个数据按最小的 n维中的欧式距离不断递归 划分给k个特征值,最后输出 我们运算得到的通过给任意个n维的数据得到的具体的k个特征值的n维坐标
import random
import math
# 定义对象帮助我们完成kneans
class FindList():
def getCurrentResultList(self, currentList, currentRandom):
resultList = []
for temp in currentList:
currentDistance = []
for currentR in currentRandom:
m1 = MathService()
currentDistance.append(m1.distance(temp, currentR))
resultList.append([currentDistance.index(min(currentDistance)), len(resultList)])
resultList.sort()
return resultList
def getNewPoints(self, currentResultList, firstList, currentPoint, limitCondition):
reultList = []
# 把firstList里面的第一个装入currentDimensionCount 以后作为不同维度的累加和格式为【x1count,x2count,x3count,x4count......】
currentDimensionCount =[i for i in firstList[currentResultList[0][1]] ]
currentCount = 1
for i in range(1, len(currentResultList)):
if currentResultList[i-1][0] == currentResultList[i][0]:
currentCount += 1
# 注意下一行这个1 只要不更改currentResultList的格式这个1就永远不会变
currentDimensionCount = [old + firstList[currentResultList[i][1]][xi] for old, xi in zip(currentDimensionCount, range(len(firstList[0])))]
else:
currentResultListDimensionCount = [i/currentCount for i in currentDimensionCount ]
reultList.append(currentResultListDimensionCount)
currentCount = 1
currentDimensionCount = [firstList[currentResultList[i][1]][xi] for xi in range(len(firstList[0]))]
if i ==len(currentResultList)-1:
currentResultListDimensionCount = [i / currentCount for i in currentDimensionCount]
reultList.append(currentResultListDimensionCount)
if self.isEnd(reultList, currentPoint, limitCondition) == 0:
print('最后的点是')
print(currentPoint)
return 0
return reultList
def isEnd(self, reultList, currentPoint, limitCondition):
reultList = reultList[:]
currentPoint = currentPoint[:]
m1 = MathService()
count = math.sqrt(sum( [m1.distance(x, y)**2 for x,y in zip(reultList, currentPoint) ]))
if count > limitCondition:
print('count:'+ str(count))
return count
else:
return 0
# 定义数学类辅助我们
class MathService():
# 根据最大值最小值生成随机整数
def getRandomNum(self, minNum, maxNum):
return random.randint(-minNum, maxNum)
# 生成一个维度的一个列表
def creatADimensionList(self, dimensionCount, minNum, maxNum):
resultList = []
for i in range(dimensionCount):
resultList.append(self.getRandomNum(minNum, maxNum))
return resultList
# 生成指定维度的指定长度的列表
def creatFirstList(self,dimensionCount =2 , listLength = 100, minNum =0, maxNum =100):
return [self.creatADimensionList(dimensionCount, minNum, maxNum) for i in range(listLength)]
# 获取第一次的5个随机数
def getFirstRandom(self, listLength, randomCount, firstList):
while True:
resultList = []
resultListIndex = []
resultListIndex = [random.randint(0, listLength-1) for i in range(randomCount)]
if len(set(resultListIndex)) == randomCount:
resultListIndex = list(resultListIndex)
resultList = [firstList[i] for i in resultListIndex]
return resultList
# 计算两个点之间的距离
def distance(self, pointer1, pointer2):
return math.sqrt(sum([(x1 - x2)**2 for x1, x2 in zip(pointer1, pointer2)]))
m1 = MathService()
dimensionCount = 2 # 维度
listLength = 100 # 模拟数据的个数
minNum = 0 # 模拟数据的最小值
maxNum = 100 # 模拟数据的最大值
randomCount = 5 # 模拟的特征点的个数
limitCondition = 0.001 # 模拟到最后的可以接受的误差值
# 生成随机列表
firstList = m1.creatFirstList(dimensionCount, listLength, minNum, maxNum)
# 生成第一我们要的 k个随机点
currentPoint = firstRandom = m1.getFirstRandom(listLength, randomCount, firstList)
f1 = FindList()
while True:
# 获取将firstList 分成k类之后的list
currentResultList = f1.getCurrentResultList(firstList, currentPoint) # 注意这里一定是firstList
# 获取新的k个特征点
currentPoint = f1.getNewPoints(currentResultList, firstList, currentPoint, limitCondition)
if currentPoint == 0:
print(currentResultList)
break