Python:
import numpy as np
import matplotlib.pylab as plt
from math import sqrt
from sklearn import datasets
from collections import Counter
class Knn():
"""python写的简单的KNN算法"""
def __init__(self,data):
"""加载数据,鸢尾花数据集"""
self.data=data
def splitdata(self,test_ration):
X=self.data["data"]#数据集
Y=self.data["target"]#特征
"""最值归一化"""
for i in range(0,4):
X[:,i]=(X[:,i]-np.min(X[:,i]))/(np.max(X[:,i])-np.min(X[:,i]))
"""划分测试集和训练集"""
shuffle_index=np.random.permutation(len(X))
test_size=int(len(X)*test_ration)#测试集的数量
train_index=shuffle_index[0:len(X)-test_size]
test_index=shuffle_index[-test_size:]
self.X_train=X[train_index]
self.Y_train=Y[train_index]
self.X_test=X[test_index]
self.Y_test=Y[test_index]
def knn_1(self,xtar,k):
"""knn的过程"""
distances=[]
for x_t in self.X_train:
d=sqrt(np.sum(x_t-xtar)**2)#求平方差
distances.append(d)
min_index=np.argsort(distances)#排序,返回的是对应的下标
topK_y=[self.Y_train[i] for i in min_index[:k]]#返回k个类型
votes= Counter(topK_y)
predict_y=votes.most_common(1)[0][0]
return predict_y
def score_y(self,k):
"""计算机精确度"""
y_predict=[]
for row in self.X_test:
y_predict.append(self.knn_1(row,k))
return sum(y_predict==self.Y_test)/len(self.Y_test)
iris=datasets.load_iris()
knn=Knn(iris)
knn.splitdata(0.2)
print(knn.score_y(6))
Sklearn:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
class Knn():
def __init__(self, data):
"""加载数据,鸢尾花数据集"""
self.data = data
def splitdata(self,test_ration):
X=self.data["data"]#数据集
Y=self.data["target"]#特征
"""划分测试集和训练集"""
self.X_train,self.X_test,self.Y_train,self.Y_test=train_test_split(X,Y,test_size=test_ration,random_state=666)
"""归一化"""
standardScaler=StandardScaler()
standardScaler.fit(self.X_train)
self.X_train=standardScaler.transform(self.X_train)
self.X_test=standardScaler.transform(self.X_test)
def knn_1(self,k):
knn_clf=KNeighborsClassifier(k)
knn_clf.fit(self.X_train,self.Y_train)
y_predict=knn_clf.predict(self.X_test)
print(self.Y_test)
print(y_predict)
return knn_clf.score(self.X_test,self.Y_test)
data=datasets.load_iris()
knn=Knn(data)
knn.splitdata(0.2)
print(knn.knn_1(5))