Python 以鸢尾花为例子,实现KNN算法

#  KNN for iris data set with cross validation
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.model_selection import train_test_split,cross_val_score
import matplotlib.pyplot as plt
import numpy as np

# Load Dataset:     
feature, flowerC = load_iris(return_X_y=True)
print(feature)
print(flowerC)
### -------------------------------- Data reduction --------------------------------

# PCA to reduce it feature from 4 to 2 dimensions
pca = PCA(n_components=2)
feature_reduced = pca.fit_transform(feature)
# print(feature_reduced)
# print("Singular values are:", pca.singular_values_)
# print("Ratio of explained variance are:", pca.explained_variance_ratio_)

plt.figure(figsize=(10, 6))
labels=['setosa', 'versicolor', 'virginica']
s = plt.scatter(feature_reduced[:, 0], feature_reduced[:, 1], 
                c=flowerC, marker='o', cmap='rainbow', alpha = 0.6)
plt.xlabel('pc1')
plt.ylabel('pc2')
plt.legend(handles = s.legend_elements()[0], labels=labels)
plt.title('Iris')
plt.show()

### ---------------------------- Split data into Train/test sets ----------------------

test_size = 0.2
train_feature,test_feature, train_flowerC, test_flowerC = train_test_split(
                                                        feature_reduced, flowerC,test_size=test_size,random_state=3)

### ------------------------------ Find the best choice of K for KNN --------------------------
#  Use Training set !!!

# KNN (k from 1 to 30) with 6-fold cross validation 
K = 30
cv = 6 #把训练集分成6份,进行六次,我们一般1~5次
k_range = range(1, K+1)
k_error = []

# iterations for k from 1 to 30
for k in k_range:
    knn = KNN(n_neighbors=k)
    # 6-fold cross validation ,cv=cv = 6 分成六份,取100 来训练模型,数据
    scores = cross_val_score(knn, train_feature, train_flowerC, cv=cv, scoring='accuracy')
#     print('Accuracy for each fold of cross validation', scores)
    k_error.append(1- scores.mean())
#     print('Test error for k =' + str(k) + ':', 1 - scores.mean())

k_best = k_error.index(min(k_error))+1
print ('----------------------------------')
print ('The best k for KNN is :', k_best)

plt.figure(figsize=(10, 6))
plt.plot(k_range, k_error, 'g*-', alpha = 0.6)
plt.annotate('Best K', xy=(k_best, min(k_error)), xytext=(k_best+2, min(k_error)*1.2), 
             fontsize= 12, color= 'red', arrowprops=dict(color='black', shrink=0.04))
plt.title('Test error for KNN with 6-fold cross validation')
plt.xlabel('Number of K nearest neighbors')
plt.ylabel('Test error')
plt.show()

### ------------------------ Test KNN with best K ---------------------------------
#  Use training set to train knn, and evaluate knn on test set !!!

best_knn = KNN(n_neighbors=k_best) 
best_knn.fit(train_feature, train_flowerC) # train knn fit=train
test_predictC = best_knn.predict(test_feature) # predict unknown features to get their classes
# print(best_knn.predict_proba(test_feature)) # predict probability

# plot the KNN training set and test results
plt.figure(figsize=(10, 6))
labels=['setosa', 'versicolor', 'virginica']
s = plt.scatter(feature_reduced[:, 0], feature_reduced[:, 1], 
                c=flowerC, s=220, marker='o', cmap='rainbow', alpha = 0.3)

s = plt.scatter(train_feature[:, 0], train_feature[:, 1], 
                c=train_flowerC, marker='^', cmap='rainbow', alpha = 1)

s = plt.scatter(test_feature[:, 0], test_feature[:, 1], 
                c=test_predictC, marker='*', cmap='rainbow', alpha = 1)
plt.xlabel('pc1')
plt.ylabel('pc2')
plt.legend(handles = s.legend_elements()[0], labels=labels)
plt.title('Classifcation results with KNN')
plt.show()

 

 

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值