数学公式见:
https://www.orzzz.net/directory/mathematics/MultivariateStatisticalAnalysis/CrispClusterValidityIndices/index.html
Python 源码:
def MinkowskiDistance(a: list, b: list, p: int = 2) -> float:
return sum([abs(i - j) ** p for i, j in zip(a, b)]) ** (1 / p)
def Centroid(C: list[list]) -> list:
return list(map(lambda c: sum(c) / len(c), zip(*C)))
def Cohesion(C: list[list]) -> float:
centroid = Centroid(C)
return sum(MinkowskiDistance(c, centroid) for c in C) / len(C)
def DaviesBouldinIndex(C: list[list[list]], DLL_SO: str = None) -> float:
return sum(
max(
(Cohesion(Ci) + Cohesion(Cj)) / MinkowskiDistance(Centroid(Ci), Centroid(Cj)) for Cj in C if Ci != Cj
)
for Ci in C
) / len(C)
if __name__ == '__main__':
C = [
[
[5.1, 3.5, 1.4, 0.2],
[4.9, 3.0, 1.4, 0.2],
[5.0, 3.3, 1.4, 0.2]
],
[
[4.9, 2.4, 3.3, 1.0],
[6.6, 2.9, 4.6, 1.3]
],
[
[6.5, 3.0, 5.5, 1.8],
[7.7, 3.8, 6.7, 2.2],
[7.2, 3.2, 6.0, 1.8],
[6.4, 2.8, 5.6, 2.1]
]
]
print('Illusionna DBI:\t%.16f' % DaviesBouldinIndex(C))
# --------------------------------------------------------------
from sklearn.metrics import davies_bouldin_score
matrix = [sample for cluster in C for sample in cluster]
labels = [i for i in range(len(C)) for _ in range(len(C[i]))]
print('sklearn DBI:\t%.16f' % davies_bouldin_score(matrix, labels))
与 sklearn 对比结果: