# -*- coding: utf-8 -*- """ Created on Wed May 11 22:52:29 2016@author: sanmao """import random import math import operator
defloadDataset(filename,split,trainingSet=[],testSet=[]):with open(filename,'rb') as csvfile:
lines = csv.reader(csvfile)
dataest = list(lines)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] = float(dataset[x][y])
if random.random() < split:
trainingSet.append(dataset[x])
else:
testSet.append(dataset[x])
defeuclideanDistance(instance1,instance2,length):
distance = 0for x in range(length):
distance += pow((instance1[x] - instance2[x]),2)
return math.sqrt(distance)
defgetNeighbors(trainingSet,testInstance,k):
distances = []
length = len(testInstance)-1for x in range(len(trainingSet)):
dist = euclideanDistance(testInstance,trainingSet[x],length)
distances.append((trainingSet[x],dist))
distances.sort(key=operator.itemgetter(1))
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
defgetResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1]
if response in classVotes:
classVotes[response] += 1else:
classVotes[response] = 1
sortedVotes = sorted(classVotes.iteritems(),key=operator.itemgetter(1),reverse=True)
return sortedVotes[0][0]
defgetAccuracy(testSet,predictions):
corrrect = 0for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1return (corrrect/float(len(testSet))) * 100.0defmain():
trainingSet=[]
testSet=[]
split = 0.67
loadDataset(r'D',split,trainingSet,testSet)
print('Train set:'+repr(len(trainingSet)))
print('Test set:'+repr(len(testSet)))
predictions=[]
k = 3for x in reange(len(testSet)):
neighbors = getNeighbors(trainingSet,testSet[x],k)
result = getResponse(neighbors)
predictions.append(result)
print('>predicted='+repr(result)+',actual='+repr(testSet[x][-1]))
accuracy = getAccuracy(testSet,predictions)
print('Accuracy:'+repr(accuracy)+'%')
main()
1. # -*- coding: utf-8 -*- """ Created on Wed May 11 22:52:29 2016 @author: sanmao """ import random import math import operator def loadDataset(filename,split,trainingSet=[],testSet=[]):