使用Python实现KNN算法

我想我明白

于 2020-05-30 21:26:46 发布

阅读量1.2k

点赞数 1

文章标签： python 算法机器学习

本文链接：https://blog.csdn.net/wxwmb11/article/details/106447279

版权

使用Python实现KNN算法

实验名称
算法实现
实现结果

实验名称

KNN算法的实现，利用数据集processed.cleveland.data
UCI公开数据集-heart disease，属性信息如下:

#3 (age)
#4 (sex)
#9 (cp)
#10 (trestbps)
#12 (chol)
#16 (fbs)
#19 (restecg)
#32 (thalach)
#38 (exang)
#40 (oldpeak)
#41 (slope)
#44 (ca)
#51 (thal)
#58 (num) (the predicted attribute)
数据集参考网址：https://archive.ics.uci.edu/ml/datasets/Heart+Disease

算法实现

训练数据集以及KNN函数的编写

import numpy as np
import operator
f=open('D:/Tencent/QQ/qq文件/WeChat Files/w384660107/FileStorage/File/2020-05/processed.cleveland.data')
age=[]
sex=[]
cp=[]
tresbps=[]
chol=[]
fbs=[]
restecg=[]
thalach=[]
exang=[]
oldpeak=[]
slope=[]
ca=[]
thal=[]
num=[]
for i,d in enumerate(f):
    d=d.strip()
    if not d:
        continue
    d=list(map(float,d.split(',')))
    age.append(d[0])
    sex.append(d[1])
    cp.append(d[2])
    tresbps.append(d[3])
    chol.append(d[4])
    fbs.append(d[5])
    restecg.append(d[6])
    thalach.append(d[7])
    exang.append(d[8])
    oldpeak.append(d[9])
    slope.append(d[10])
    ca.append(d[11])
    thal.append(d[12])
    num.append(d[13])
group=np.empty(shape=[0,13],dtype=float)
labels=np.empty(shape=[0,1],dtype=float)
for i in range(0,303):
    group=np.append(group,[[age[i],sex[i],cp[i],tresbps[i],chol[i],fbs[i],restecg[i],thalach[i],exang[i],oldpeak[i],slope[i],ca[i],thal[i]]],axis=0)
    labels=np.append(labels,[[num[i]]],axis=0)
print(group.shape)
print(labels.shape)
print("我想我明白12345229")
def kNN_Classify(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]
    diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet
    #关于tile函数的用法
    #>>> b=[1,3,5]
    #>>> tile(b,[2,3])
    #array([[1, 3, 5, 1, 3, 5, 1, 3, 5],
    #       [1, 3, 5, 1, 3, 5, 1, 3, 5]])
    sqDiffMat = diffMat ** 2
    sqDistances = sum(sqDiffMat)
    distances = sqDistances ** 0.5            #                           算距离
    sortedDistIndicies =np.argsort(distances)
    #关于argsort函数的用法
    #argsort函数返回的是数组值从小到大的索引值
    #>>> x = np.array([3, 1, 2])
    #>>> np.argsort(x)
    #array([1, 2, 0])
    classCount = {}                                          #定义一个字典
#   选择k个最近邻
    for i in range(k):
        voteLabel = labels[sortedDistIndicies[i]]
                                                            # 计算k个最近邻中各类别出现的次数
        classCount[voteLabel] = sortedDistIndicies + 1
 
    #返回出现次数最多的类别标签i
    maxCount = 0
    for key, value in classCount.items():
        if value > maxCount:
            maxCount = value
            maxIndex = key
    return maxIndex

KNN函数的实现以及对目标样本的分析

import numpy as np
dataSet=group
test=np.array([61.0,1.0,1.0,134.0,234.0,0.0,0.0,145.0,0.0,2.6,2.0,2.0,3.0])
k=1
outputLabel = kNN_Classify(test, dataSet, labels,1)
print("你输入的样本我想我明白925", test, "wsx心血管病的类别为我想我明白123 ", outputLabel)