#!/usr/bin/python
# coding=utf-8
from numpy import *
import operator
import pandas as pd
import sys
import KNN
sys.path.append(r'C:/Users/Documents/6、play/')
import xlwt
##导入训练集:
def load_data():
label=pd.read_csv('label.csv', header=None)
label=label.values
data=[]
for i in label:
for j in i :
data.append(j)
label=data
dataset=pd.read_csv('dataset.csv', header=None)
dataset=dataset.values
test = pd.read_csv('test.csv', header=None)
test = test.values
return label,dataset,test
def kNNClassify(newInput, dataSet, labels, k):
numSamples = dataSet.shape[0] # shape[0]表示行数
diff = tile(newInput, (numSamples, 1)) - dataSet # 按元素求差值
squaredDiff = diff ** 2 # 将差值平方
squaredDist = sum(squaredDiff, axis=1) # 按行累加
distance = squaredDist ** 0.5 # 将差值平方和求开方,即得距离
sortedDistIndices = argsort(distance)
classCount = {} # define a dictionary (can be append element)
for i in range(0,k):
voteLabel = labels[sortedDistIndices[i]]
classCount[voteLabel] = classCount.get(voteLabel, 0) + 1
# # step 5: 返回出现次数最多的类别标签
maxCount = 0
for key, value in classCount.items():
if value > maxCount:
maxCount = value
maxIndex = key
return maxIndex
book=xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet=book.add_sheet('test',cell_overwrite_ok=True)
label,dataset,test = load_data()
n=1
for i in test:
outputLabel = kNNClassify(i, dataset, label, 3)
print('第 ',n,'个为: ',str(outputLabel))
sheet.write(0, 0, str(outputLabel))
n=n+1
book.save('out.xls')