简单的KNN邻近算法,如有问题,欢迎补充~
# -*- coding:utf-8 -*-
'''
python3.6
简单的k邻近算法
'''
from numpy import *
# 抛出异常
def throw_error(err):
raise Exception(err)
# 字典按value进行排序
def sortedDictValue(dict, reverse = False):
arr = []
if dict is None:
return arr
# 利用lambda表达式排序
arr = sorted(dict.items(), key = lambda x:x[1],
reverse = reverse) # 返回list
return arr
# 创建数据集
def createDataSet():
group = [
[3, 104],
[2, 100],
[1, 81],
[101, 10],
[99, 5],
[98, 2],
]
labels = ['A', 'A', 'A', 'B', 'B', 'B']
return group, labels
# k邻近算法
def classifyKNN(vectorX, dataSet, labels, k):
# 检查参数是否合法
if not isinstance(labels, list):
throw_error("labels必须为list数组")
if not isinstance(k, int):
throw_error("k参数必须为整数")
# 训练集行数
dataSize = dataSet.shape[0]
k = dataSize if k > dataSize else k
# 构造训练集与样本的差值矩阵
diffMat = tile(vectorX, (dataSize, 1)) - dataSet
# 求元素的平方
sqDiffMat = power(diffMat, 2)
# 行求和开方
sumDiffMat = sqDiffMat.sum(axis=1)
# 求出样本到每个训练集的距离
distance = power(sumDiffMat, 0.5)
# 获得从小到大的顺序并转换为一维数组
sortDis = mat2OneArr(argsort(distance, axis=0))
classCount = {} # 训练集的计数
for i in range(k):
lb = labels[sortDis[i]]
classCount[lb] = classCount.get(lb, 0) + 1
# 根据距离大小排序
sortClassCount = sortedDictValue(classCount, True)
return sortClassCount[0][0]
def main():
# 测试
group, labels = createDataSet()
try:
ret = classifyKNN([0, 1], group, labels, 3);
print(ret)
except Exception as e:
print(e)
if __name__ == '__main__':
main()