K最近邻算法
正如名字本身所说--近朱者赤近墨者黑
1.需要逐步掌握:
- K最邻近算法处理2元分类
- K最邻近算法处理多元分类
- K最邻近算法的回归分析
- K最邻近算法的实例
2.所用模块:
#导入数据集生成器
from sklearn.datasets import make_blobs
#导入knn分类器
from sklearn.neighbors import KNeighborsClassifier
#导入画图工具
import matplotlib.pyplot as plt
#导入数据集拆分工具
from sklearn.model_selection import train_test_split
#生成样本为200,分类为2大的数据集
import numpy as np
from sklearn.datasets import make_regression
3.代码讲解:
#实验1.。。。。。。。。。。
#导入数据集生成器
from sklearn.datasets import make_blobs
#导入knn分类器
from sklearn.neighbors import KNeighborsClassifier
#导入画图工具
import matplotlib.pyplot as plt
#导入数据集拆分工具
from sklearn.model_selection import train_test_split
#生成样本为200,分类为2大的数据集
data = make_blobs(n_samples=200,centers =2,random_state=8)
X,y =data
plt.scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.spring,edgecolor='k')
plt.show()
通过make_blobs创建随机数
plt.scatter画图
其中X,y分别表示属性组和类型组。
#实验2.。。。。。。。。
import numpy as np
clf = KNeighborsClassifier()
clf.fit(X,y)
#下面代码用于画图
x_min, x_max = X[:,0].min() -1,X[:,0].max()+1
y_min, y_max = X[:,1].min() -1,X[:,1].max()+1
xx,yy = np.meshgrid(np.arange(x_min,x_max,.02),np.arange(y_min,y_max,.02))
z = clf.predict(np.c_[xx.ravel(),yy.ravel()])#@@@@@@@
z = z.reshape(xx.shape)
plt.pcolormesh(xx,yy,z,cmap=plt.cm.Pastel1)
plt.scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.spring,edgecolor='k')
plt.xlim(xx.min(),xx.max())
plt.ylim(yy.min(),yy.max())
plt.title("Classifier:KNN")
plt.scatter(6.75,4.82,marker='*',c='red',s=200)
plt.show()
print('新数据点的分类是:',clf.predict([[6.75,6]]))
#实验3...............
data2=make_blobs(n_samples=500,centers=5,random_state=8)
X2,y2=data2
plt.scatter(X2[:,0],X2[:,1],c=y2,cmap=plt.cm.spring,edgecolor='k')
plt.show()
clf = KNeighborsClassifier()
clf.fit(X2,y2)
#下面代码用于画图
x_min, x_max = X2[:,0].min() -1,X2[:,0].max()+1
y_min, y_max = X2[:,1].min() -1,X2[:,1].max()+1
xx,yy = np.meshgrid(np.arange(x_min,x_max,.02),np.arange(y_min,y_max,.02))
z = clf.predict(np.c_[xx.ravel(),yy.ravel()])#@@@@@@@
z = z.reshape(xx.shape)
plt.pcolormesh(xx,yy,z,cmap=plt.cm.Pastel1)
plt.scatter(X2[:,0],X2[:,1],c=y2,cmap=plt.cm.spring,edgecolor='k')
plt.xlim(xx.min(),xx.max())
plt.ylim(yy.min(),yy.max())
plt.title("Classifier:KNN")
plt.show()
print('模型正确率:{:.2f}'.format(clf.score(X2,y2)))
实验2为2元分类,实验3为多元分类
#实验4.。。。。。。。。
from sklearn.datasets import make_regression
X,y = make_regression(n_features=1,n_informative=1,noise=50,random_state=8)
plt.scatter(X,y,c='orange',edgecolor='k')
plt.show()
from sklearn.neighbors import KNeighborsRegressor
reg=KNeighborsRegressor()
reg.fit(X,y)
z=np.linspace(-3,3,1000).reshape(-1,1)
plt.scatter(X,y,c='orange',edgecolor='k')
plt.plot(z,reg.predict(z),c='k',linewidth=3)
plt.title('knn regressor')
plt.show()
print('模型正确率:{:.2f}'.format(reg.score(X,y)))
reg2=KNeighborsRegressor(n_neighbors=2)
reg2.fit(X,y)
z=np.linspace(-3,3,1000).reshape(-1,1)
plt.scatter(X,y,c='orange',edgecolor='k')
plt.plot(z,reg2.predict(z),c='k',linewidth=3)
plt.title('knn regressor')
plt.show()
print('模型正确率:{:.2f}'.format(reg2.score(X,y)))
实验4为回归分析
4.K最邻近算法实例:
'''
from sklearn.datasets import load_wine
wine = load_wine()
print('\n')
print(wine.keys())
print('\n')
print(wine['data'].shape)
#train_test_split拆分工具
from sklearn.model_selection import train_test_split
X_train ,X_test , y_train ,y_test = train_test_split(wine['data'],wine['target'],random_state=0)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 1)
knn.fit(wine['data'],wine['target'])
import numpy as np
X_new = np.array([[13.2,2.77,2.51,18.5,96.6,1.04,2.55,0.57,1.47,6.2,1.05,3.33,820]])
predict = knn.predict(X_new)
print('预测新红酒的分类为:{}'.format(wine['target_names'][predict]))
'''
##自测
from bunch import *
book = Bunch()
book.data = [[16,50,2,15,80],[25,11,26,24,11],[10,1,0,16,60],[16,19,16,16,146],[11,7,4,15,6]]
book.target =[0,1,2,3,4]
book.target_name = ['肉类零食','乳制品','水果干','辣条','面包']
book.data_name = ['能量','蛋白质','脂肪','碳水化合物','纳']
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
knn = KNeighborsClassifier(n_neighbors = 1)
X_train ,X_test , y_train ,y_test = train_test_split(book['data'],book['target'],random_state=0)
knn.fit(X_train,y_train)
print('测试得分:{:.2f}\n'.format(knn.score(X_test,y_test)))
knn.fit(book['data'],book['target'])
newbook = np.array([[80,23,80,10,10]])
predict = knn.predict(newbook)
print('概率为:{}'.format(knn.predict_proba(newbook)))
print('测试结果:\n{}'.format(np.array(book['target_name'])[predict][0]))
注释部分为sklearn模块自带的红酒数据,通过wine.keys()可查看结构
后面为自己组建的数据测试,由于数据过少只作为参考,数据格式使用Bunch对象。