python实现k-means算法
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import math
data = pd.read_csv("D:/download/machine-learning/iris_data.csv")
# print(data)
datax = data[['Sepal.Length','Sepal.Width','Petal.Length','Petal.Width']]
data_X = np.array(datax.values)
lable = np.zeros(len(data_X))
#初始化3个聚类点
k1 = random.randint(0,50)
k2 = random.randint(50,100)
k3 = random.randint(100,150)
kmeans = []
kmeans.append(data_X[k1])
kmeans.append(data_X[k2])
kmeans.append(data_X[k3])
k1 = 0
k2 = 1
k3 = 2
#迭代次数
inter = 20
#计算平方和
def counter(i,k):
sum = 0
for j in range(4):
sum += math.pow(data_X[i][j]- kmeans[k][j],2)
return sum
for k in range(inter):
print(kmeans)
#计算各点到聚类中心点的距离
for i in range(len(data_X)):
i1 = math.sqrt(counter(i,k1))
i2 = math.sqrt(counter(i,k2))
i3 = math.sqrt(counter(i,k3))
if i1<i2:
if i1<i3: lable[i] = 1
else: lable[i] = 3
else:
if i2<i3: lable[i] = 2
else:lable[i] = 3
#更新聚类中心点
temp_k1 = np.zeros(4)
temp_k2 = np.zeros(4)
temp_k3 = np.zeros(4)
c1 = 0
c2 = 0
c3 = 0
for i in range(len(data_X)):
if lable[i] == 1:
temp_k1 += data_X[i]
c1 += 1
elif lable[i] == 2:
temp_k2 += data_X[i]
c2 += 1
else:
temp_k3 += data_X[i]
c3 += 1
temp_k1 = temp_k1/c1
temp_k2 = temp_k2/c2
temp_k3 = temp_k3/c3
kmeans[0] = temp_k1
kmeans[1] = temp_k2
kmeans[2] = temp_k3
#数据转换,为画图方便
print(type(str(lable[4])))
lable_str = []
for i in range(len(data_X)):
lable_str.append(str(lable[i]))
datax['lable'] = lable_str
pd.plotting.andrews_curves(datax, 'lable')
plt.show()
实验课写的,可能有错误,欢迎指正。