KNN

 

 

主要思路,找到与之最相近的k个点,每个点贡献不同权值的,用来进行分类。

代码就是来预测是B还是M的准确率。

测试数据:

iddiagnosis_resultradiustextureperimeterareasmoothnesscompactnesssymmetryfractal_dimension
1M23121519540.1430.2780.2420.079 
2B91313313260.1430.0790.1810.057 
3M212713012030.1250.160.2070.06 
4M1416783860.070.2840.260.097 
5M91913512970.1410.1330.1810.059 
6B2525834770.1280.170.2090.076 
7M162612010400.0950.1090.1790.057 
8M1518905780.1190.1650.220.075 
9M1924885200.1270.1930.2350.074 
10M2511844760.1190.240.2030.082 
11M24211037980.0820.0670.1530.057 
12M17151047810.0970.1290.1840.061 
13B141513211230.0970.2460.240.078 
14M12221047830.0840.10.1850.053 
15M1213945780.1130.2290.2070.077 
16M2219976590.1140.160.230.071 
17M1016956850.0990.0720.1590.059 
18M15141087990.1170.2020.2160.074 
19M201413012600.0980.1030.1580.054 
20B1711875660.0980.0810.1890.058 
21B1614865200.1080.1270.1970.068 
22B1724602740.1020.0650.1820.069 
23M20271037040.1070.2140.2520.07 
24M191213714040.0940.1020.1770.053 
25M9131109050.1120.1460.20.063 
26M19271169130.1190.2280.3040.074 
27M1024976450.1050.1870.2250.069 
28M162412210940.0940.1070.170.057 
29M15151027320.1080.170.1930.065 
30M11161159550.0980.1160.1740.061 
31M112212510880.1060.1890.2180.062 
32M2326784410.1110.1520.230.078 
33M20181138990.120.150.2250.064 
34M112112811620.0940.1720.1850.063 
35M16231078070.1040.1560.20.065 
36M10131108700.0960.1340.190.057 
37M1812946330.0980.110.1890.061 
38B2111835240.090.0380.1470.059 
39M1115966990.0940.0510.1570.055 
40M1014885590.1020.1260.1720.064 
41M2416865630.0820.060.1780.056 
42M1927723710.1230.1220.190.069 
43M111112811040.0910.2190.2310.063 
44M1521875450.1040.1440.1970.068 
45M1015855320.0970.1050.1750.062 
46M181112410760.110.1690.1910.06 
47B2212522020.0860.0590.1770.065 
48M2014865350.1160.1230.2130.068 
49B2021784490.1030.0910.1680.06 
50B2511875610.0880.0770.1810.057 
51B1925754280.0860.050.150.059 
52B1922875720.0770.0610.1350.06 
53B2515764380.0830.0480.1870.061 
54M142612010330.1150.1490.2090.063 
55M1825977130.0910.0710.1620.057 
56B1813734090.0950.0550.1920.059 
57M101912611520.1050.1270.1920.06 
58M1720966570.1140.1370.2030.068 
59B2215835270.0810.0380.1820.055 
60B2326542250.0980.0530.1680.072 
61B1518653120.1130.0810.2740.07 
62B2515552220.1240.090.1830.068 
63M1222966460.1050.2010.1950.073 
64B2417592610.0770.0880.2340.07 
65M1619834990.1120.1260.1910.066 
66M1121976680.1170.1480.1950.067 
67B1213602690.1040.0780.1720.069 
68B1812723940.0810.0470.1520.057 
69B1617592510.1070.1410.2110.08 
70B1721815030.0980.0520.1590.057 
71M211812411300.090.1030.1580.055 
72B926592440.0980.1530.190.09 
73M21121149290.1070.1830.1930.065 
74M2225905840.1010.1280.1660.066 
75B1813794710.0920.0680.1720.059 
76M21181048180.0920.0840.180.054 
77B1017885590.1290.1050.240.066 
78M112112010060.1070.2150.2150.067 
79M161814412450.1290.3450.2910.081 
80B2216835060.0990.0950.1720.06 
81B1018744020.110.0940.1840.07 
82B1721865200.1080.1540.1940.069 
83M101517218780.1060.2670.1830.068 
84M201412911320.1220.1790.1630.072 
85B2521774430.0970.0720.2080.06 
86M141312110750.0990.1050.2130.06 
87M1926946480.0940.0990.2080.056 
88M191112210760.090.1210.1950.056 
89B1111804660.0880.0940.1930.064 
90B1223966520.1130.1340.2120.063 
91B2327956630.090.0860.1690.059 
92M10121007280.0920.1040.1720.061 
93B1414855520.0740.0510.1390.053 
94B1017875550.1020.0820.1640.057 
95M22261007060.1040.1550.1860.063 
96M231613212640.0910.1310.210.056 
97B2214784510.1050.0710.190.066 
98B1927622950.1020.0530.1350.069 
99B2124744130.090.0750.1620.066 
100M1627946430.0980.1140.1880.064 

 

代码为:

import csv
import random
#文件读取
with open("Prostate_Cancer.csv","r")as file:
    reader = csv.DictReader(file)
    datas = [row for row in reader]

#分组
#random.shuffle(datas)
n = len(datas)//3
test_set = datas[0:n]
train_set = datas[n:]

#求距离
def distance(d1,d2):
    res = 0
    for key in("radius","texture","perimeter",
	"area","smoothness","compactness","symmetry","fractal_dimension"):
        res += (float(d1[key])-float(d2[key]))**2
    return res**0.5

#KNN
K = 5
def knn(data):
#1、求距离
    res = [
        {"result":train['diagnosis_result'],"distance":distance(data,train)}
        for train in train_set
    ]
#2、排序
    sorted(res,key = lambda item:item['distance'])
#3、取前K个值
    res2 = res[0:K]
#4、加权平均一下
    result = {"B":0,"M":0}
    #求总数
    sum = 0
    for r in res2:
        sum += r['distance']
    #逐个贡献权值
    for r in res2:
        result[r['result']] += 1 - r['distance']/sum
    print(result)
    if result['B']>result['M']:
        return 'B'
    else:
        return 'M'

correct = 0
for test in test_set:
    result = test['diagnosis_result']
    result2 = knn(test)

    if result==result2:
        correct += 1
print("准确率:{:.2f}%".format(correct*100/len(test_set)))
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值