kNN 算法实现

最新推荐文章于 2021-06-25 21:04:42 发布

jsfantasy

最新推荐文章于 2021-06-25 21:04:42 发布

阅读量184

点赞数

分类专栏：机器学习文章标签：机器学习 numpy python

本文链接：https://blog.csdn.net/november_chopin/article/details/104686336

版权

机器学习专栏收录该内容

5 篇文章 1 订阅

订阅专栏

kNN Algorithm

import numpy as np
import matplotlib.pyplot as plt

raw_data_X = [[3.4, 2.3],
              [3.1, 1.8],
              [1.3, 3.4],
              [3.6, 4.7],
              [2.3, 2.9],
              [7.4, 4.7],
              [5.7, 3.5],
              [9.2, 2.5],
              [7.8, 3.4],
              [7.9, 0.8],
             ]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)

X_train

array([[3.4, 2.3],
       [3.1, 1.8],
       [1.3, 3.4],
       [3.6, 4.7],
       [2.3, 2.9],
       [7.4, 4.7],
       [5.7, 3.5],
       [9.2, 2.5],
       [7.8, 3.4],
       [7.9, 0.8]])

y_train

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

# 要预测的点
x = np.array([8.1, 3.4])

plt.scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1], color='g')
plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], color='r')
plt.scatter(x[0], x[1], color='b')

<matplotlib.collections.PathCollection at 0x20ba3c947b8>

在这里插入图片描述

1. kNN实现过程

欧拉距离
$\sqrt{\sum_{i=1}^n(X_i^{(a)} - X_i^{(b)})^2}$

from math import sqrt

distances = []
for x_train in X_train:
    # 计算欧拉距离
    d = sqrt(np.sum((x_train - x) ** 2))
    distances.append(d)

    # 也可以如下写法
# distances = [sqrt(np.sum((x_train - x) ** 2)) for x_train in X_train]

distances

[4.827007354458868,
 5.2497618993626745,
 6.8,
 4.684015371452148,
 5.821511831131154,
 1.4764823060233399,
 2.4020824298928622,
 1.4212670403551892,
 0.2999999999999998,
 2.607680962081059]

nearest = np.argsort(distances)
nearest

array([8, 7, 5, 6, 9, 3, 0, 1, 4, 2], dtype=int64)

# 找出距离最近的 K 个点
k = 6
topK_y = [y_train[i] for i in nearest[:k]]
topK_y

[1, 1, 1, 1, 1, 0]

from collections import Counter

# 对最近的 K 个值进行类别统计
Counter(topK_y)

Counter({1: 5, 0: 1})

votes = Counter(topK_y)

# 返回数量最多的 K 个值， 这里 K 为 1
votes.most_common(1)

[(1, 5)]

# 得出预测结果
predict_y = votes.most_common(1)[0][0]

predict_y

2. 使用 scikit-learn中的kNN

from sklearn.neighbors import KNeighborsClassifier

KNN_classifier = KNeighborsClassifier(n_neighbors=6)

KNN_classifier.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=6, p=2,
           weights='uniform')

# 在 sklearn 中， predict 接受一个数组
X_predict = x.reshape(1, -1)
X_predict

array([[8.1, 3.4]])

KNN_classifier.predict(X_predict)

array([1])

y_predict = KNN_classifier.predict(X_predict)
y_predict[0]

3. 重新整理我们的 kNN 代码

from kNN.kNN import KNNClassifier

knn_clf = KNNClassifier(k=6)

knn_clf.fit(X_train, y_train)

KNN(k=6)

y_predict = knn_clf.predict(X_predict)

y_predict

array([1])

KNNClassifier.py

import numpy as np
from math import sqrt
from collections import Counter
from metrics import accuracy_score

class KNNClassifier:
    def __init__(self, k):
        assert k >= 1, "k must be valid"
        self.k = k
        self._X_train = None
        self._y_train = None

    def fit(self, X_train, y_train):
        """根据训练数据集X_train和y_train训练kNN分类器"""
        assert X_train.shape[0] == y_train.shape[0], \
            "the size of X_train must be equal to the size of y_train"
        assert self.k <= X_train.shape[0], \
            "the size of X_train must be at least k."

        self._X_train = X_train
        self._y_train = y_train
        return self

    def predict(self, X_predict):
        assert self._X_train is not None and self._y_train is not None, \
            "must fit before predict!"
        assert X_predict.shape[1] == self._X_train.shape[1], \
            "the feature number of X_predict must be equal to X_train"

        y_predict = [self._predict(x_predict) for x_predict in X_predict]
        return np.array(y_predict)

    def _predict(self, x_predict):
        """给定单个待预测数据x，返回x的预测结果值"""
        assert x_predict.shape[0] == self._X_train.shape[1], \
            "the feature number of x must be equal to X_train"

        dist = [ sqrt(sum((x_train - x_predict)**2)) for x_train in self._X_train]
        nearest = np.argsort(dist)
        top_K = [ self._y_train[i] for i in nearest[:self.k]]
        votes = Counter(top_K)
        return votes.most_common(1)[0][0]

    def score(self, X_test, y_test):
        y_predict = self.predict(X_test)
        return accuracy_score(y_test, y_predict)

    def __repr__(self):
        return "KNN(k=%d)" % self.k

metrics.py

import numpy as np

def accuracy_score(y_true, y_predict):
    """计算 y_true 和 y_predict 之间的准确率"""
    assert y_true.shape[0] == y_predict.shape[0], \
        "the size of y_true must be equal to the size of y_predict"

    return sum(y_true == y_predict) / len(y_true)

项目源码地址

jsfantasy

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
kNN 算法实现

kNN Algorithmimport numpy as npimport matplotlib.pyplot as pltraw_data_X = [[3.4, 2.3], [3.1, 1.8], [1.3, 3.4], [3.6, 4.7], [2.3, 2.9], ...
复制链接

扫一扫