手写KNN代码实现案例分类
上篇博客,详细计算了此案例的KNN算法过程☞详情请戳☜,下面我们通过手写python代码实现KNN算法。
案例:
1.导入包
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
2.了解数据点分布
#训练数据
x1 = np.array([20000,25000,40000])
y1 = np.array([20,30,45])
x2 = np.array([60000,50000,40000])
y2 = np.array([40,25,50])
#定义图像上的点
s1 = plt.scatter(x1,y1,c='r')
s2 = plt.scatter(x2,y2,c='b')
#测试数据点
x_test = np.array([30000])
y_test = np.array([20])
#定义测试数据的点
s3 = plt.scatter(x_test,y_test,marker='*')
# 画图
plt.legend(handles=[s1,s2,s3],labels=['B','G','unknow'])
plt.show()
3.KNN算法手写
## 训练数据
x_data = np.array([
[20000,20],
[60000,40],
[25000,30],
[50000,25],
[40000,50],
[40000,45]
])
target = np.array(['B','G','B','G','G','B'])
##测试数据
x_test = np.array([30000,20])
##(1)数据标准化
x_max = np.max(x_data,axis=0)
x_min = np.min(x_data,axis=0)
maxset = np.tile(x_max,(x_data.shape[0],1))
minset = np.tile(x_min,(x_data.shape[0],1))
x_scale = (x_data-minset)/(maxset-minset)
x_stest = (x_test-x_min)/(x_max-x_min)
##(2)计算距离
test = np.tile(x_stest,(x_data.shape[0],1)) #构建和训练集形状相同的数组
diff = (x_scale-test)**2 #对应相减再平方
eu_dis = diff.sum(axis=1)**0.5 #每行相加再开方
eu_dis
##(3)确定k近邻
sorted_dis = eu_dis.argsort()
sorted_dis
k = 3
classcount = {}
for i in range(k):
votetarget = target[sorted_dis[i]] #依次输出排序在前的k个标签
print(votetarget)
#将votetarget中的每个元素拿出再依次放入,统计次数,生成字典
classcount[votetarget] = classcount.get(votetarget,0)+1
classcount
max_k = ''
max_v = 0
for k,v in classcount.items():
if v > max_v:
max_v = v #若有比目前更大的(标签出现)次数更新成最大的
max_k = k
print("测试集标签为:",max_k)
sklearn 实现iris分类
1.导入算法包
#KNN算法包
from sklearn import neighbors
#导入数据集所在包
from sklearn import datasets
#导入数据分割工具
from sklearn.model_selection import train_test_split
#导入数据分析报告
from sklearn.metrics import classification_report,confusion_matrix
# 导入模型选择包
from sklearn import model_selection
- 载入数据
iris = datasets.load_iris()
iris.data
iris.target
- 打乱数据并切分
x_train,x_test,y_train,y_test = train_test_split(iris.data, iris.target, test_size=0.2)
- 建立模型
knn_model = neighbors.KNeighborsClassifier(n_neighbors=3)
knn_model.fit(x_train,y_train)
- 模型预测
pred_knn = knn_model.predict(x_test)
- 生成分类报告
print(classification_report(y_test,pred_knn))
print(confusion_matrix(y_test,pred_knn))