记录一下常用的三种向量相似度计算代码,包括欧式距离、余弦距离、KL散度。
class DistanceCalculate(object):
def __init__(self, vector1, vector2):
self.vector1 = vector1
self.vector2 = vector2
if len(self.vector1) != len(self.vector2):
print(self.vector1, len(self.vector1), '\n', self.vector2, len(self.vector2))
print("Error: vector length is not equal.")
exit()
elif len(self.vector1) == len(self.vector2) and len(self.vector1) == 0:
print("Error: vector length is zero.")
exit()
else:
pass
def euclidean_distance(self):
d = np.sqrt(np.sum(pow(np.array(self.vector1)-np.array(self.vector2), 2)))
return d
def cosine_distance(self):
inner_product = np.dot(np.array(self.vector1), np.array(self.vector2))
d = inner_product / ((np.sqrt(np.sum(pow(np.array(self.vector1), 2))))*(np.sqrt(np.sum(pow(np.array(self.vector2), 2)))))
return inner_product, d
def kullback_leibler_divergence(self):
vector1, vector2 = np.array(self.vector1), np.array(self.vector2)
vector12 = np.array(self.vector1 + self.vector2)
norm_vector1 = (vector1 - np.min(vector12)) / (np.max(vector12) - np.min(vector12))
norm_vector2 = (vector2 - np.min(vector12)) / (np.max(vector12) - np.min(vector12))
div = np.sum(norm_vector1 * (np.log2((norm_vector1 + 1e-7) / (norm_vector2 + 1e-7))))
return div