在sklearn中没有找到能够直接针对稀疏矩阵进行聚类的方法,自己先写了个距离计算
def cal_distance(coo1,coo2):
'''计算两个COO格式稀疏向量欧式距离'''
common = set(arr1.col)&set(arr2.col)
unique1 = set(arr1.col)-set(arr2.col)
unique2 = set(arr2.col)-set(arr1.col)
distance = 0
for i,c in enumerate(arr1.col):
if c in common:
distance += (arr1.data[i]-arr1.data[arr1.col==c][0])**2
elif c in unique1:
distance += (arr1.data[i])**2
for i,c in enumerate(arr2.col):
if c in unique2:
distance += (arr2.data[i])**2
else:
continue
return distance