#knn 模型:
import numpy as np
import matplotlib.pyplot as plt
X_trian = np.array([
[158, 64],
[170, 86],
[183, 84],
[191, 80],
[155, 49],
[163, 59],
[180, 67],
[158, 54],
[170, 67]
])
y_train = ['male','male','male','male','female','female','female','female','female' ]
plt.figure()#可以用来改变图的一些特征
plt.title("hunam heights and weights by sex ")
plt.xlabel("height in cm ")
plt.ylabel("weight in kg ")
#x y 轴分别是 重量和身高,y_train 里面是性别
for i ,x in enumerate(X_trian):
#x[0]里面是身高 , x[1]里面是体重
plt.scatter(x[0] , x[1], s= 40,c= 'b' , marker= "x" if y_train[i] == 'male' else 'D' , cmap="colormap" )
#[matplotlib.pyplot.scatte 的用法](https://blog.csdn.net/qq_43541507/article/details/102983224)
#matplotlib.pyplot.scatter(x, y, s=None, c=None, marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=None,linewidths=None, verts=None, edgecolors=None, data=None, kwargs)
#plt.scatter(x,y, s=None, c=None, marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=None,linewidths=None, verts=None, edgecolors=None, data=None)
plt.grid(True)#生成网格
plt.show()
#上面只是一个简单的绘图:
#下面可以来计算他们的距离了
#导入与预测数据
x= np.array([
[155,70]
])
#计算每个人 和预测数据距离的大小
distances = np.sqrt(np.sum((X_trian - x )**2 , axis=1 ))#我觉得这一步真的棒,能直接算出里面的距离。
print(distances)
#然后就可以进行筛选了:筛选奇数个人的
nearest_neighbor_indices = distances.argsort()[:3]
#下面这个函数就是正真的计算了:将他们
nearest_neighbor_genders = np.take(y_train , nearest_neighbor_indices)
#打印离他最近的三个人的性别 take 实现了将距离转换为预测值的功能
print("take后的数据",nearest_neighbor_genders)
#打印距离最近的三人的距离的下标(没什么作用,
print(distances.argsort()[:3])
from collections import Counter
#让机器人通过最近人的数据预测人物性别
b = Counter(np.take(y_train ,distances.argsort()[:3]))#进行了排序实际上就像b打印出来那样
print("b=",b)
b.most_common(1)
print(b.most_common(1))
print(b.most_common(1)[0][0])
print(b.most_common(2))
print(b.most_common(2)[0][0],b.most_common(2)[1][0],b.most_common(2)[0][1],b.most_common(2)[1][1])
#Counter(a).most_common(2)可以打印出数组中出现次数最多的元素。参数2表示的含义是:输出几个出现次数最多的元素。但是他输出的是元组,我们只需要第一个性别就可,所以需要加上[0][0]
ok 现在来结果:
[ 6.70820393 21.9317122 31.30495168 37.36308338 21. 13.60147051
25.17935662 16.2788206 15.29705854]
take后的数据 ['male' 'female' 'female']
[0 5 8]
b= Counter({'female': 2, 'male': 1})
[('female', 2)]
female
[('female', 2), ('male', 1)]
female male 2 1
Process finished with exit code 0