最近邻分类:在距离空间里,如果一个样本的最接近的k个邻居里,绝大多数属于某个类别,则该样本也属于这个类别。
KNN分类
案例分析:电影分类
from sklearn import neighbors # 导入KNN分类模块
data = pd.DataFrame({'name':['北京遇上西雅图','喜欢你','疯狂动物城','战狼2','力王','敢死队'],
'fight':[3,2,1,101,99,98],
'kiss':[104,100,81,10,5,2],
'type':['Romance','Romance','Romance','Action','Action','Action']})
print(data)
print('-------')
# 加载数据,构建KNN分类模型
# 预测未知数据
knn = neighbors.KNeighborsClassifier() # 取得knn分类器
knn.fit(data[['fight','kiss']], data['type'])
# print('预测电影类型为:', knn.predict([18, 90]))
# 绘制图表
plt.scatter(data[data['type'] == 'Romance']['fight'],data[data['type'] == 'Romance']['kiss'],color = 'r',marker = 'o',label = 'Romance')
plt.scatter(data[data['type'] == 'Action']['fight'],data[data['type'] == 'Action']['kiss'],color = 'g',marker = 'o',label = 'Action')
plt.grid()
plt.legend()
plt.scatter(18,90,color = 'r',marker = 'x',label = 'Romance')
plt.ylabel('kiss')
plt.xlabel('fight')
plt.text(18,90,'《你的名字》',color = 'r')
# 给随机数据进行模拟预测
# 创建数据,并调用模型预测
data2 = pd.DataFrame(np.random.randn(100,2)*50,columns = ['fight','kiss'])
data2['type'] = knn.predict(data2)
print(data2.head())
print('------')
# 绘制图表
plt.scatter(data[data['type'] == 'Romance']['fight'],data[data['type'] == 'Romance']['kiss'],color = 'r',marker = 'o',label = 'Romance')
plt.scatter(data[data['type'] == 'Action']['fight'],data[data['type'] == 'Action']['kiss'],color = 'g',marker = 'o',label = 'Action')
plt.grid()
plt.scatter(data2[data2['type'] == 'Romance']['fight'],data2[data2['type'] == 'Romance']['kiss'],color = 'r',marker = 'x',label = 'Romance')
plt.scatter(data2[data2['type'] == 'Action']['fight'],data2[data2['type'] == 'Action']['kiss'],color = 'g',marker = 'x',label = 'Action')
plt.legend()
plt.ylabel('kiss')
plt.xlabel('fight')