1、简而言之:根据你的邻居来推断你的类别
2、距离公式(确定邻居)
3、K值取的过小,容易受到异常点的影响
K值取的过大,容易受到样本不均衡的影响
4、API
n_neighbours即K值
5、总结
ex_1
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
def knn_iris():
'''
用KNN算法对鸢尾花进行分类
:return:
'''
# 1)获取数据
iris = load_iris()
# 2)划分数据集
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=6)
# 3)特征工程:标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4)KNN短发预估器
estimator = KNeighborsClassifier(n_neighbors=3)
estimator.fit(x_train, y_train)
# 有了模型
# 5)模型评估
# 方法1:直接对比真实值和预测值
y_predict = estimator.predict(x_test)
print("y_predict: \n", y_predict)
print("直接对比真实值和预测值: \n", y_test == y_predict)
# 方法2:计算准确率
score = estimator.score(x_test, y_test)
print("准确率:\n", score)
if __name__ == '__main__':
knn_iris()