试编程实现Relief算法,并考察其在西瓜3.0上的结果。
# coding: utf-8
import numpy as np
input_path = "西瓜数据集3.csv"
file = open(input_path.decode('utf-8'))
filedata = [line.strip('\n').split(',') for line in file]
filedata = [[float(i) if '.' in i.decode('utf-8') else i for i in row ] for row in filedata] # change decimal from string to float
filedata = filedata[1:]
X = [row[1:-1] for row in filedata] # attributes
Y = [row[-1] for row in filedata] # class label
weight = np.zeros(len(X[0]))
# Normalise
for row in X:
row[-2] = (row[-2]-0.243) / (0.774-0.243)
row[-1] = (row[-1]-0.042) / (0.46-0.042)
def cal_dis(a, b):
ret = 0
for i in range(len(a)):
ai = a[i]
bi = b[i]
if type(a[i]) == float:
ret += np.abs(ai-bi)
else:
ret += 0 if ai==bi else 1
return ret
def fin