knn算法简单实现札记
# 导入鸢尾花数据
from sklearn.datasets import load_iris
data = load_iris()
print(data)
# 转成DataFrame看下数据
import pandas as pd
df = pd.DataFrame(data.data,columns=data.feature_names)
df ['target'] = data.target_names[data.target]
df.head()
# 画个图,更直观地感受下
import matplotlib.pyplot as plt
plt.scatter(data.data[:,2],data.data[:,3],c=data.target)
plt.show()
# 写个距离函数
import numpy as np
def distance(a,b):
return np.sum(np.abs(a-b),axis=1)
# 写个 knn 模型
class KNN(object):
def __init__(self,k):
self.k = k
def fit(self,x,y):
self.x_train = x
self.y_train = y
def predict(self,x):
y_pre = np.zeros((x.shape[0],1),dtype=self.y_train.dtype)
for i,x_test in enumerate(x):
dis = distance(self.x_train,x_test)
ind = np.argsort(dis)
tmp = self.y_train[ind[:self.k]]
y_pre[i] = np.argmax(np.bincount(tmp))
return y_pre
# 切分训练集和测试集
X = data.data
y = data.target
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=10,test_size=0.3,stratify=y)
print(X_train.shape,X_test.shape)
print(y_train.shape,y_test.shape)
# 模型训练和预测
knn = KNN(k=3)
knn.fit(X_train,y_train)
y_pre = knn.predict(X_test)
# 模型训练和预测
knn = KNN(k=3)
knn.fit(X_train,y_train)
y_pre = knn.predict(X_test)