在做分类时常常需要估算不同样本之间的相似性度量,这时通常采用的方法就是计算样本间的距离。采用什么样的方法计算距离关系到分类的正确与否。
距离的python实现
一、欧式距离
# -*- coding: utf-8 -*-
import numpy as np
def eucliDist(A,B):
return np.sqrt(sum(np.power((A-B),2)))
X=np.array([1,2,3,4])
Y=np.array([0,1,2,3])
print('Euclidean distance:',eucliDist(X,Y))
二、曼哈顿距离
# -*- coding: utf-8 -*-
import numpy as np
def manhaDist(A,B):
return np.sum(np.abs(A-B))
X=np.array([1,2,3])
Y=np.array([4,5,6])
print('Manhattan distance:',manhaDist(X,Y))
三、切比雪夫距离
# -*- coding: utf-8 -*-
import numpy as np
def chebyDist(A,B):
return np.max(np.abs(A-B))
X=np.array([1,2,3])
Y=np.array([4,7,5])
print('Chebyshev distance:',chebyDist(X,Y))
四、闵可夫斯基距离
# -*- coding: utf-8 -*-
from math import *
from decimal import Decimal
def nth_root(value, n_root):
root_value = 1 / float(n_root)
return round(Decimal(value) ** Decimal(root_value), 3)
def minkowski_distance(x, y, p_value):
return nth_root(sum(pow(abs(a - b), p_value) for a, b in zip(x, y)), p_value)
print(minkowski_distance([0, 3, 4, 5], [7, 6, 3, -1], 3))
五、标准化欧式距离
# -*- coding: utf-8 -*-
import numpy as np
def standEuDist(A,B):
T=np.vstack([A,B])
sk=np.var(T,axis=0,ddof=1)
return np.sqrt(((A-B)**2/sk).sum())
X=np.array([1,2,3,4])
Y=np.array([0,1,2,3])
print('Standardized Euclidean distance:',standEuDist(X,Y))
六、余弦距离
# -*- coding: utf-8 -*-
import numpy as np
from scipy.spatial.distance import pdist
def cosDist(A,B):
S=np.vstack([A,B])
dist=1-pdist(S,'cosine')
return str(dist)
X = [1, 2, 3, 4]
Y = [5, 6, 7, 8]
print("Cosine distance:",cosDist(X,Y))
七、汉明距离
# -*- coding: utf-8 -*-
from numpy import *
def hamDist(matV):
smstr=nonzero(matV[0]-matV[1])
return (shape(smstr[0])[0])
matV = mat([[1,0,0,1,0,1,0,0,1],[0,1,1,0,0,0,1,1,1]])
print('Hamming distance:',hamDist(matV))
八、杰卡德距离
# -*- coding: utf-8 -*-
import scipy.spatial.distance as dist
import numpy as np
def jacDist(A,B):
matv=np.array([A,B])
return dist.pdist(matv,'jaccard')
X=np.array([1,1,0,1,0,1,0,0,1])
Y=np.array([0,1,1,0,0,0,1,1,1])
print('Jaccard distance:',jacDist(X,Y))
九、马氏距离
# -*- coding: utf-8 -*-
from numpy import *
import numpy as np
def mahaDist(x):
xT=x.T
print(xT)
D=np.cov(xT)
invD=np.linalg.inv(D)
print(invD)
tp=x[0]-x[1]
return np.sqrt(dot(dot(tp,invD),tp.T))
x = np.array([[3,4],[5,6],[2,2],[8,4]])
print('Mahalanobies distance:',mahaDist(x))