KNN自编程实现和sklearn实现

最新推荐文章于 2024-03-13 11:30:51 发布

AI_Younger_Man

最新推荐文章于 2024-03-13 11:30:51 发布

阅读量157

点赞数

分类专栏： # 机器学习 Machine Learning 文章标签： python

本文链接：https://blog.csdn.net/qq_38888209/article/details/105909232

版权

机器学习 Machine Learning 专栏收录该内容

31 篇文章 4 订阅

订阅专栏

在这里插入图片描述

# import math
# import sklearn
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from sklearn import datasets
# from sklearn.model_selection import train_test_split
# from sklearn.model_selection import cross_val_predict
# from collections import Counter
#
#
# def dist(x1, x2, p=2):  # p=1曼哈顿距离 p=2是欧氏距离 p=3...默认2
#     if len(x1) == len(x2) and len(x1) > 1:
#         my_sum = 0
#         for i in range(0, len(x1), 1):
#             my_sum += math.pow(abs(x1[i] - x2[i]), p)  # 注意这里的绝对值，不加的话会报错
#         ret = math.pow(my_sum, 1/p)
#         return ret
#     return 0
#
#
# def homework():
#     a = [1, 1]
#     b = [5, 1]
#     c = [4, 4]
#
#     for p in range(1, 6, 1):
#         myresu = [dist(a, b, p), dist(a, c, p)]
#         print("p为：" + str(p) + ", a到b, c的距离是：%f %f" % (myresu[0], myresu[1]))
#
#
# def feature_program():
#     iris = sklearn.datasets.load_iris()
#     df = pd.DataFrame(iris.data, columns=iris.feature_names)
#     df['label'] = iris.target
#     df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
#     # print(df.head(10))
#     # plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='0')
#     # plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='1')
#     # plt.xlabel('sepal length')
#     # plt.ylabel('sepal width')
#     # plt.show()
#     # plt.legend()
#
#     data = np.array(df.iloc[:100, [0, 1, -1]])
#     x, y = data[:, :-1], data[:, -1]
#     x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
#
#
#
# class KNN:
#     def __init__(self, X_train, y_train, n_neighbors=3, p=2):
#         """
#         parameter: n_neighbors 临近点个数
#         parameter: p 距离度量
#         """
#         self.n = n_neighbors
#         self.p = p
#         self.X_train = X_train
#         self.y_train = y_train
#
#     def predict(self, X):
#         # 取出n个点
#         knn_list = []
#
#         for i in range(self.n):
#             # 调用库函数求向量的p范数 ： 指定向量到每一个训练集当中的举例
#             # 元祖保存起来，（距离， 分类）
#             dist = np.linalg.norm(X - self.X_train[i], ord=self.p)
#             knn_list.append((dist, self.y_train[i]))
#
#         for i in range(self.n, len(self.X_train)):
#             #
#             max_index = knn_list.index(max(knn_list, key=lambda x: x[0]))
#             dist = np.linalg.norm(X - self.X_train[i], ord=self.p)
#             if knn_list[max_index][0] > dist:
#                 knn_list[max_index] = (dist, self.y_train[i])
#
#         # 统计
#         knn = [k[-1] for k in knn_list]
#         count_pairs = Counter(knn)
# #         max_count = sorted(count_pairs, key=lambda x: x)[-1]
#         max_count = sorted(count_pairs.items(), key=lambda x: x[1])[-1][0]
#         return max_count
#
#     def score(self, X_test, y_test):
#         right_count = 0
#         n = 10
#         for X, y in zip(X_test, y_test):
#             label = self.predict(X)
#             if label == y:
#                 right_count += 1
#         return right_count / len(X_test)
#
#
#
#
#

import math
import pandas as pd
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


def dist(x, y, p=2):
    my_sum = 0
    for i in range(0, len(x), 1):
        my_sum += math.pow(abs(x[i] - y[i]), p)
    return math.pow(my_sum, 1 / p)


def exanple_3_1():
    x1 = [1, 1]
    x2 = [5, 1]
    x3 = [4, 4]

    for p in range(2, 6):
        print(p)
        print(dist(x1, x2, p))
        print(dist(x1, x3, p))
        print()


iris = datasets.load_iris()
df = pd.DataFrame(data=iris['data'], columns=iris['feature_names'])
df['target'] = iris['target']

plt.scatter(df[0:50]['sepal length (cm)'], df[0:50]['sepal width (cm)'], label='0')
plt.scatter(df[50:100]['sepal length (cm)'], df[50:100]['sepal width (cm)'], label='1')
plt.scatter(df[100:150]['sepal length (cm)'], df[100:150]['sepal width (cm)'], label='2')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend()
plt.show()

x = np.array(df.iloc[:, 0:2])
y = np.array(df.iloc[:, -1])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)


class KNN:
    def __init__(self, x_train, y_train, neighbor=5, p=2):
        self.x_train = x_train
        self.y_train = y_train
        self.neighbor = neighbor
        self.p = p
        self.diction = []
        for i in range(len(x_train)):  # [ ([], 类别)， （） ]
            temp = (x_train[i], y_train[i])
            self.diction.append(temp)

    def fit(self, x):
        # x是输入要用KNN判断的点，x_train是已经知道画在图上的点，要求这个点到每个点的距离
        my_result = [] # 【[距离， 类别], [距离, 类别] ，，，】
        for position in self.diction:
            my_result.append([np.linalg.norm(x - position[0], ord=self.p), position[1]])

        my_result.sort()
        countA = 0
        countB = 0
        countC = 0
        for i in range(0, self.p):
            if my_result[i][-1] == 0:
                countA += 1
            elif my_result[i][-1] == 1:
                countB += 1
            else:
                countC += 1
        if countA > countB:
            return 0
        elif countC > countB:
            return 2
        else:
            return 1

    def test(self, x_test, y_test):
        count = 0
        for (point, label) in zip(x_test, y_test):
            if self.fit(point) == label:
                count += 1
        return count / len(x_test)


df1 = pd.DataFrame(data=x_train)
df2 = pd.DataFrame(data=x_test)
plt.scatter(df1[:][0], df1[:][1], label='train')
plt.scatter(df2[:][0], df2[:][1], label='test')
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend()
plt.show()

clf = KNN(x_train, y_train, neighbor=5, p=2)
print(clf.test(x_test, y_test))

在这里插入图片描述

AI_Younger_Man

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
KNN自编程实现和sklearn实现

# import math# import sklearn# import numpy as np# import pandas as pd# import matplotlib.pyplot as plt# from sklearn import datasets# from sklearn.model_selection import train_test_split# f...
复制链接

扫一扫