import csv
import random
with open('Prostate_Cancer.csv','r') as file:
reader=csv.DictReader(file)
datas=[row for row in reader]
random.shuffle(datas)
n=len(datas)//3
test_set=datas[0:n]
train_set=datas[n:]
def distance(d1,d2):
res=0
for key in ("radius","texture","perimeter","area","smoothness","compactness","symmetry","fractal_dimension"):
res+=(float(d1[key])-float(d2[key]))**2
return res**0.5
k=5
def knn(data):
res=[
{"result":train['diagnosis_result'],"distance":distance(data,train)}
for train in train_set
]
res=sorted(res,key=lambda item:item['distance'])
res2=res[0:k]
result={'B':0,'M':0}
sum=0
for r in res2:
sum+=r['distance']
for r in res2:
result[r['result']]+=1-r['distance']/sum
if result['B']>result['M']:
return 'B'
else:
return 'M'
correct=0
# knn(test_set[0])
for test in test_set:
result=test['diagnosis_result']
result2=knn(test)
if result==result2:
correct+=1
print("准确率:{:.2f}%".format(100*correct/len(test_set)))
knn算法实现对癌症的预测,代码来自b站的knn算法视频,数据集是提前准备好的。
代码最后的结果:
代码解释
random.shuffle对数据集进行洗牌操作。
for key遍历csv文件里的参数。
输出结果比对真实是否患病并输出准确率。