knn–依靠一个点的基础实现
knn是一个基础的分类的算法,它不像其他的神经网络算法,他只是通过简单的比较某个维度的几何距离(???)来实现分类的
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import random
iris_datasets = load_iris()
trainData = iris_datasets['data'][:]
data0 = [i for j in trainData for i in j[:1]]
data1 = [i for j in trainData for i in j[1:2]]
data2 = [i for j in trainData for i in j[2:3]]
data3 = [i for j in trainData for i in j[3:4]]
kind0 = [i for i in range(50)]
kind1 = [i for i in range(50,100)]
kind2 = [i for i in range(100,150)]
########################
ax = plt.subplot(111,projection = '3d')
plt.title("First")
ax.scatter(data0[:50],data1[:50],data2[:50],c = 'r')
ax.scatter(data0[50:100],data1[50:100],data2[50:100],c = 'g')
ax.scatter(data0[100:150],data1[100:150],data2[100:150],c = 'b')
# ax,scatter()
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
plt.show()
#plt.close()
########################
plt.title("Second")
bx = plt.subplot(111,projection = '3d')
bx.scatter(data3[:50],data1[:50],data2[:50],c = 'r')
bx.scatter(data3[50:100],data1[50:100],data2[50:100],c = 'g')
bx.scatter(data3[100:150],data1[100:150],data2[100:150],c = 'b')
# ax,scatter()
bx.set_xlabel("X")
bx.set_ylabel("Y")
bx.set_zlabel("Z")
plt.show()
#plt.close()
###########################
plt.title("Third")
cx = plt.subplot(111,projection = '3d')
cx.scatter(data3[:50],data0[:50],data2[:50],c = 'r')
cx.scatter(data3[50:100],data0[50:100],data2[50:100],c = 'g')
cx.scatter(data3[100:150],data0[100:150],data2[100:150],c = 'b')
# ax,scatter()
cx.set_xlabel("X")
cx.set_ylabel("Y")
cx.set_zlabel("Z")
plt.show()
plt.close()
#########################
testData = []
realValue = []
for _ in range(5):
a = random.randint(0,149)
realValue.append(a//50)
flag = 0
if a in trainData:
flag = 1
if a in kind0 and flag == 1:
del kind0[a]
elif a in kind1 and flag == 1:
del kind1[a-50]
else:
if flag == 1:
del kind2[a-100]
testData.append([data0[a],data1[a],data2[a],data3[a]])
if flag != 1:
del data0[a]
del data1[a]
del data2[a]
del data3[a]
# print(testData,'/',trainData)
def distence(array1 = [0,0,0,0], array2 = [0,0,0,0]):
if array1.__len__() != array2.__len__():
print(r"error")
return
num = 0
for _ in range(array1.__len__()):
num = num + (array1[_] - array2[_])**2
return num
def predict(array1 = [0,0,0,0]):
if array1.__len__() != 4:
print(r"error")
return -1
variety = distence(array1,[data0[0],data1[0],data2[0],data3[0]])
flag = 0
for _ in range(1,145):
if(variety>distence(array1,[data0[_],data1[_],data2[_],data3[_]])):
variety = distence(array1,[data0[_],data1[_],data2[_],data3[_]])
flag = _
# print(flag)
return flag//50
per = 0
for i in range(testData.__len__()):
print("预测值",predict(testData[i]),"真实值",realValue[i])
if(predict(testData[i]) == realValue[i]):
per = per + 1
if(i == testData.__len__()-1):
print("识别成功率:",per*100/testData.__len__(),"%\n")
这里只是一个点的简单实现,多个点的实现思路也一样,只是需要将多个点的集合距离相加来比较处最值
使用matplotlib来可视化数据的分类,但是由于具有四个参数,无法绘制出四维图形,因此也可以通过绘制四张三维的图来表示。