KNN中,训练样本有train_count个,测试样本有test_count个,每个样本有attr_count个属性。现在需要快速计算test_count个测试样本和train_count个样本之间的距离,距离表示为一个test_count行、train_count列的矩阵。
这个问题可以转化为矩阵乘法,而矩阵乘法可以使用GPU进行加速。
import numpy as np
train_count = 3
test_count = 2
attr_count = 4
train = np.random.randint(0, 10, (train_count, attr_count))
test = np.random.randint(0, 10, (test_count, attr_count))
def one():
c = np.empty((test_count, train_count), dtype=np.float32)
for i in range(c.shape[0]):
for j in range(c.shape[1]):
c[i, j] = np.linalg.norm(test[i] - train[j])
return c
def two():
c = np.empty((test_count, train_count), dtype=np.float32)
for i in range(c.shape[0]):
c[i] = np.linalg.norm(test[i] - train, axis=1)
return c
def three():
d1 = np.linalg.norm(test, axis=1)
d2 = np.linalg.norm(train, axis=1)
d3 = np.matmul(test, train.T)
d12 = (d1 ** 2).reshape(test_count, 1)
d22 = (d2 ** 2).reshape(1, train_count)
ans = np.sqrt(d12 + d22 - 2 * d3)
return ans
print(one())
print(two())
print(three())