from sklearn.datasets import load_iris #导入数据集iris
import math
from collections import Counter
iris = load_iris() #载入数据集,有iris.data和iris.target
#前149个样本作为训练集,最后一个做预测
X=iris.data[0:149]
Y=iris.target[:149]
x=iris.data[149]
#求距离
d=[math.sqrt(np.sum((x-X[i])**2)) for i in range(len(X))]
near=np.argsort(d)
k=6 #knn的K
top_k=[Y[i] for i in near[0:6]]
votes=Counter(top_k)
pre_y=votes.most_common(1)[0][0]#预测结果存在pre_y中
#直接调用sklearn里面的KNN
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier(n_neighbors=6)
knn.fit(X,Y)
knn.predict([x])
# -*- coding: utf-8 -*-
"""
Created on Thu May 16 21:17:56 2019
@author: Lenovo
"""
#模仿sklearn 形式封装
from sklearn.datasets import load_iris #导入数据集iris
import math
from collections import Counter
import numpy as np
class K_NN():
def __init__(self,K):
self.K=K
self._x_train=None
self._y_train=None
def fit(self,X_train,Y_train):
self._x_train=X_train
self._y_train=Y_train
return self
def predict(self,X_pre):
y_pre=[self._predict(x) for x in X_pre]
return np.array(y_pre)
def _predict(self,x):
d=[math.sqrt(np.sum((x-self._x_train[i])**2)) for i in range(len(self._x_train))]
near=np.argsort(d)
top_k=[self._y_train[i] for i in near[0:6]]
votes=Counter(top_k)
pre_y=votes.most_common(1)[0][0]#预测结果存在pre_y中
return pre_y
iris = load_iris() #载入数据集,有iris.data和iris.target
#前149个样本作为训练集,最后一个做预测
X=iris.data[0:149]
Y=iris.target[:149]
x=iris.data[149]
knn=K_NN(6)
knn.fit(X,Y)
knn.predict([x])
#用自己封装的代码
from sklearn.datasets import load_iris #导入数据集iris
import math
from collections import Counter
import numpy as np
iris = load_iris() #载入数据集,有iris.data和iris.target
#前149个样本作为训练集,最后一个做预测
X=np.array(iris.data)
Y=np.array(iris.target)
#train_test_split
#sf_index=np.random.permutation(len(X))
#test_ratio=0.2
#test_size=int(len(X)*test_ratio)
#test_index=sf_index[:test_size]
#train_index=sf_index[test_size:]
#x_train=X[train_index]
#y_train=Y[train_index]
#
#x_test=X[test_index]
#y_test=Y[test_index]
class K_NN():
def __init__(self,K):
self.K=K
self._x_train=None
self._y_train=None
def fit(self,X_train,Y_train):
self._x_train=X_train
self._y_train=Y_train
return self
def predict(self,X_pre):
y_pre=[self._predict(x) for x in X_pre]
return np.array(y_pre)
def _predict(self,x):
d=[math.sqrt(np.sum((x-self._x_train[i])**2)) for i in range(len(self._x_train))]
near=np.argsort(d)
top_k=[self._y_train[i] for i in near[0:6]]
votes=Counter(top_k)
pre_y=votes.most_common(1)[0][0]#预测结果存在pre_y中
return pre_y
def train_test__split(X,Y,test_ratio=0.2,seed=None):
if seed:
np.random.seed(seed)
sf_index=np.random.permutation(len(X))
test_ratio=0.2
test_size=int(len(X)*test_ratio)
test_index=sf_index[:test_size]
train_index=sf_index[test_size:]
x_train=X[train_index]
y_train=Y[train_index]
x_test=X[test_index]
y_test=Y[test_index]
return x_train,x_test,y_train,y_test
x_train,x_test,y_train,y_test=train_test__split(X,Y)
knn=K_NN(K=6)
knn.fit(x_train,y_train)
yy=knn.predict(x_test)
sum(yy==y_test)/len(yy)
#轮子
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=123)
knn_clf=KNeighborsClassifier(n_neighbors=6)
knn_clf.fit(x_train,y_train)
y_pre=knn_clf.predict(x_test)
sum(y_pre==y_test)/len(y_test)