机器学习实战-K近邻算法(分类电影)
#-*- Coding:utf-8 -*-
# Author:LHF Time:2019/9/21
from numpy import *
import operator #运算符模块
def creatDataSet():
group = array([[1.0,101],[5,89],[108,5],[115,8]])
labels = ['爱情片','爱情片','动作片','动作片']
return group,labels
def classify0(inx,dataSet,labels,k):
dateSetSize = dataSet.shape[0]#numpy函数shape[0]返回dataSet的行数
diffMat = tile(inx,(dateSetSize,1)) - dataSet #在行向量上inx有dataSetSize次,在列向量上inx有一次
sqDiffMat = diffMat ** 2#二维矩阵相减后每个数据求平方
sqDistances = sqDiffMat.sum(axis = 1)#求出每一行数据的总和
distances = sqDistances ** 0.5#将每一行数据的总和开平方
sortedDistIndicies = distances.argsort()#按照distances中数据的大小排序,然后替换成在原来每行数据的索引
classCount = {}
for i in range(k):#多数表决法,按大小顺序,求出前k个
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1#get(指定键,默认值) get返回指定键的值,如果没有,返回默认值
#key = operator.itemgetter(1) 根据字典的值进行排序
# key = operator.itemgetter(0) 根据字典的键进行排序
sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse = True)
#拿出字典中第一个键值对的键
return sortedClassCount[0][0]
if __name__ == '__main__':
group,labels = creatDataSet()
test = [101,20]
test_class = classify0(test,group,labels,3)
print(test_class)
**
运行结果
**
动作片