# #K Nearest Neighbor Classifier for Pima dataset#
importheapqimportrandomclassClassifier:def __init__(self, bucketPrefix, testBucketNumber, dataFormat, k):"""a classifier will be built from files with the bucketPrefix
excluding the file with textBucketNumber. dataFormat is a string that
describes how to interpret each line of the data files. For example,
for the mpg data the format is:
"class num num num num num comment""""self.medianAndDeviation=[]
self.k=k#reading the data in from the file
self.format= dataFormat.strip().split('\t')
self.data=[]#for each of the buckets numbered 1 through 10:
for i in range(1, 11):#if it is not the bucket we should ignore, read in the data
if i !=testBucketNumber:
filename= "%s-%02i" %(bucketPrefix, i)
f=open(filename)
lines=f.readlines()
f.close()for line in lines[1:]:
fields= line.strip().split('\t')
ignore=[]
vector=[]for i inrange(len(fields)):if self.format[i] == 'num':
vector.append(float(fields[i]))elif self.format[i] == 'comment':
ignore.append(fields[i])elif self.format[i] == 'class':
classification=fields[i]
self.data.append((classification, vector, ignore