import numpy as np
def load_data(filename):
data, label = [], []
fr = open(filename, 'r')
for line in fr.readlines():
line = line.strip().split(',')
data.append([int(num) for num in line[1:]])
label.append(int(line[0]))
return data, label
def predict(train_data, train_label, test_data, rank):
y_hat = []
num = 0
for x1 in test_data:
dis = []
for x2 in train_data:
dis.append(np.sqrt(np.sum(np.subtract(x1, x2) ** 2)))
ranks = np.argsort(dis)[:rank]
count = [0] * 10
for index in ranks:
count[train_label[index]] += 1
y_hat.append(count.index(max(count)))
num += 1
print('finish {}/{}.'.format(num, len(test_data)))
return y_hat
if __name__ == '__main__':
train_data, train_label = load_data('../data/mnist_train.csv')
test_data, test_label = load_data('../data/mnist_test.csv')
test_data, test_label = test_data[:200], test_label[:200]
y_hat = predict(train_data, train_label, test_data, 10)
error = 0
for y1, y2 in zip(test_label, y_hat):
if y1 != y2:
error += 1
print('acc: {}%'.format(100 - error / len(y_hat) * 100))
02. k临近模型
最新推荐文章于 2024-03-31 11:27:21 发布