k-prototypes聚类
前一篇讲述了K-Prototypes聚类的原理以及它的伪代码,本篇根据上一篇内容编写了实现K-Prototypes的Python代码。
# -*- coding: utf-8 -*-
import numpy as np
import random
from collections import Counter
def dist(x, y):
return np.sqrt(sum((x-y)**2))
def sigma(x, y):
return len(x) - sum(x == y)
def findprotos(data, k):
m, n = data.shape
num = random.sample(range(m), k)
O = []
C = []
for i in range(n):
try:
if isinstance(data[0, i], int) or isinstance(data[0, i], float):
O.append(i)
elif isinstance(data[0, i], str):
C.append(i)
else:
raise ValueError("the %d column of data is not a number or a string column" % i)
except TypeError as e:
print(e)
O_data = data[:, O]
C_data = data[:, C]
O_protos = O_data[num, :]
C_protos = C_data[num, :]
return O, C, O_data, C_data, O_protos, C_protos
def KPrototypes