有一天我结婚了,你一定要来哦,因为没有新娘,那该有多尴尬.
这是我的自己写的第一个KNN比较简单的练习案例,有关于KNN的介绍请参考我的上篇博文
This is my first simple exercise case of KNN written by myself. For an introduction to KNN, please refer to my last blog post.
#导包
import numpy as np
from matplotlib import pyplot as plt
import operator
%matplotlib inline
#自我创建数据集
data=[
[0.8,1.8],
[0.9,2.1],
[1.0,1.5],
[1.2,1.9],
[1.3,2.0],
[2.5,1.7],
[2.8,1.5],
[2.5,1.4],
[2.7,1.9],
[2.6,1.8],
[1.9,3.3],
[2.0,2.9],
[2.2,2.8],
[2.1,2.9],
[1.8,3.0],
]
label=['a','a','a','a','a','b','b','b','b','b','c','c','c','c','c']
print(label)
train_data = np.array(data)
print(train_data)
x_1=[]
y_1=[]
x_2=[]
y_2=[]
x_3=[]
y_3=[]
for i in range(5):
x_1.append(data[i][0])
y_1.append(data[i][1])
x_2.append(data[i+5][0])
y_2.append(data[i+5][1])
x_3.append(data[i+10][0])
y_3.append(data[i+10][1])
x = []
y = []
for j in range(15):
x.append(data[j][0])
y.append(data[j][1])
plt.scatter(x,y)
print(f'{x_1} \n {y_1}\n{x_2} \n {y_2}\n{x_3}\n {y_3} ')
f,ax=plt.subplots(1,1,figsize=(10,10))
for i in range (5):
ax.scatter(x_1[i],y_1[i],label='skitcat',color='r',marker='o')
ax.scatter(x_2[i],y_2[i],label='skitcat',color='b',marker='o')
ax.scatter(x_3[i],y_3[i],label='skitcat',color='g',marker='o')
test = [[1.5,2.85]]##自定义点,从图中可以看出,很明显属于第三类
x_test=1.5
y_test=2.85
ax.scatter(test[0][0],test[0][1],label='skitcat',color='m',marker = 'x')
test = np.array(test)
从图中可以看出,很明显属于第三类
#定义距离公式
def d_euc(x, y):#欧式距离
d = np.sqrt(np.sum(np.square(x- y)))
return d
def KNN(train_data,test,label,k):
distance=[]
for i in train_data:
distance.append(d_euc(i,train_data))
distance = np.array(distance)
index = distance.argsort() # 获取按距离大小排序后的索引
#print(index)
sort_dis = np.sort(distance)
count={}
o=0
print(label)
for i in index:
o=o+1
label_vote=label[i]
count[label_vote] = count.get(label_vote,0)+1 ##返回特定的键值,否则返回 0
if o>k:
break
print(label_vote)
print(count)
final_outcome=majory_vote(count)
return final_outcome
#定义决策方案——多数表决法
def majory_vote(count):
sorted_class_count = sorted(
count.items(), key=operator.itemgetter(1), reverse=True)
return sorted_class_count
label=['a','a','a','a','a','b','b','b','b','b','c','c','c','c','c']
test=[3.0,2.0]
test = np.array(test)
final_label = KNN(train_data,test,label, 6)
final_label
最后结果
可以发现最初我们看到的真实结果一样属于c类。
欢迎访问我的博客Sky’s blog