2020-10-17

最新推荐文章于 2024-08-07 21:18:06 发布

媛在路上

最新推荐文章于 2024-08-07 21:18:06 发布

阅读量100

点赞数

文章标签：机器学习

本文链接：https://blog.csdn.net/qq_44723595/article/details/109137627

版权

python之鸢尾花–KNN

方案一：引用sklearn.neighbors import KNeighborsClassifier库

import numpy as np
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier

iris = datasets.load_iris()
X = iris.data
y = iris.target


# 分出训练数据和测试数据
def train_test_split(X, y, test_radio, seed=None):
    assert X.shape[0] == y.shape[0], \
        "the size of X must be equal to the size of y"
    assert 0.0 <= test_radio <= 1.0, \
        "test_radio must between 0.0 and 1"

    if seed is not None:
        np.random.seed(seed)

    test_size = int(len(X) * test_radio)
    shuffle_index = np.random.permutation(len(X))
    test_index = shuffle_index[:test_size]
    train_index = shuffle_index[test_size:]
    X_test = X[test_index]
    y_test = y[test_index]
    X_train = X[train_index]
    y_train = y[train_index]
    return X_test, X_train, y_test, y_train


X_test, X_train, y_test, y_train = train_test_split(X, y, test_radio=0.2, seed=656)

# knn算法
skl_knn = KNeighborsClassifier(n_neighbors=7)
skl_knn.fit(X_train, y_train)
skl_predict = skl_knn.predict(X_test)
precision = np.sum(skl_predict == y_test)/len(y_test)
# dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])
print(iris.feature_names)
print(X_test)
print(f'正确率为：{precision}')


![在这里插入图片描述](https://img-blog.csdnimg.cn/20201017202032458.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQ0NzIzNTk1,size_16,color_FFFFFF,t_70#pic_center)

方案二：自定义knn

from collections import Counter
from math import sqrt

import numpy as np
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target


class My_KNN_classifier:
    def __init__(self, k):
        assert k >= 1 and k % 2 != 0, \
            "k必须是一个大于1，并且不为偶数的数"
        self.k = k
        self._X_train = None
        self._y_train = None

    def fit(self, X, y):
        assert self.k <= X_train.shape[0], \
            'k必须小于X_train.shape[0]'
        self._X_train = X
        self._y_train = y
        return self

    def predict(self, X_predict):
        assert self._X_train.shape[0] == self._y_train.shape[0], \
            "特征与所属类别的数据大小必须一一对应"
        assert X_predict.ndim == 2, \
            'X_predict.ndim == 2'
        y_predict = [self._predict(x) for x in X_predict]
        return np.array(y_predict)

    def _predict(self, x):
        distance = [sqrt(np.sum((x_i - x) ** 2)) for x_i in self._X_train]
        nearest = np.argsort(distance)[:self.k]
        topK_y = self._y_train[nearest]
        votes = Counter(topK_y)
        return votes.most_common(1)[0][0]


# 分出训练数据和测试数据
def train_test_split(X, y, test_radio, seed=None):
    assert X.shape[0] == y.shape[0], \
        "the size of X must be equal to the size of y"
    assert 0.0 <= test_radio <= 1.0, \
        "test_radio must between 0.0 and 1"

    if seed is not None:
        np.random.seed(seed)

    test_size = int(len(X) * test_radio)
    shuffle_index = np.random.permutation(len(X))
    test_index = shuffle_index[:test_size]
    train_index = shuffle_index[test_size:]
    X_test = X[test_index]
    y_test = y[test_index]
    X_train = X[train_index]
    y_train = y[train_index]
    return X_test, X_train, y_test, y_train


X_test, X_train, y_test, y_train = train_test_split(X, y, test_radio=0.2, seed=656)

# knn算法
my_knn = My_KNN_classifier(k=9)
my_knn.fit(X_train, y_train)
my_predict = my_knn.predict(X_test)
precision = np.sum(my_predict == y_test) / len(y_test)

print(iris.feature_names)
print(X_test)
print(f'正确率为：{precision}')

媛在路上

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
2
评论
2020-10-17

python之鸢尾花–KNN方案一：引用sklearn.neighbors import KNeighborsClassifier库import numpy as npfrom sklearn import datasetsfrom sklearn.neighbors import KNeighborsClassifieriris = datasets.load_iris()X = iris.datay = iris.target# 分出训练数据和测试数据def train_test
复制链接

扫一扫