KNN模型调用
在sklearn包中封装了KNN的模型。可以直接传入数据集进行调用
本文使用数据集 iris ,分别调用了sklearn的knn模块和自己编写的knn模块分析了结果,下面给出代码
knn调用
def iris_knn():
from sklearn import neighbors
from sklearn import datasets
knn = neighbors.KNeighborsClassifier()
iris = datasets.load_iris() ## 从网络直接加载数据集
with open('iris.data.csv','w') as f: # 将数据集保存在本地
data = iris.data
label = iris.target
for i in range(len(label)):
for j in range(data[i]):
f.write(str(data[i][j]) + ',')
f.write(str(label[i]) + "\n")
print(iris)
print(iris.target_names)
print(iris.feature_names)
knn.fit(iris.data, iris.target) # 训练
predict = knn.predict([[5.9, 3, 5.1, 1.8]]) # 预测
print(predict)
my knn
import csv
import random
import math
import operator
def load_data(filename, split):
#读取数据filename,并且按照数据split的比例分割为训练集和测试集
train_set = []
test_set = []
with open(filename, 'r') as f:
lines = csv.reader(f)
data_set = list(lines)
for x in range(len(data_set)-1):
for y in range(4):
data_set[x][y] = float(data_set[x][y])
if random.random() < split:
train_set.append(data_set[x])
else:
test_set.append(data_set[x])
return train_set, test_set
def get_distance(vec1, vec2): # 计算两个点的距离
distance = 0
for x in range(len(vec1)):
distance += pow(vec1[x]-vec2[x],2)
return math.sqrt(distance)
def get_neighbors(train_set, testcase, k): # 根据预测用例和训练集找到k neighbors
neighbors = []
distances = []
for case in train_set:
distances.append((case, get_distance(case[0:-2], testcase[0:-2])))
distances.sort(key=operator.itemgetter(1))
for i in range(k):
neighbors.append(distances[i][0])
return neighbors
def get_response(neighbors): # k neighbors投票分类
class_votes = {}
for case in neighbors:
response = case[-1]
if response in class_votes:
class_votes[response] += 1
else:
class_votes[response] = 1
class_votes = sorted(class_votes.items(), key=operator.itemgetter(1), reverse=True)
return class_votes[0][0]
def get_accuracy(test_set, predictions):
correct = 0
for i in range(len(test_set)):
if test_set[i][-1] == predictions[i]:
correct += 1
return correct/(float(len(predictions)))
def my_knn():
train_set, test_set = load_data('./iris.data.csv',0.66)
print(train_set)
print(test_set)
predictions = []
k = 3
for case in test_set:
neighbors = get_neighbors(train_set, case, k)
result = get_response(neighbors)
predictions.append(result)
print('>>> predict = ' + str(result) + ', actul = ' + str(case[-1]))
accuracy = get_accuracy(test_set, predictions)
print('Accuracy : ' + str(accuracy))
if __name__ == '__main__':
#iris_knn()
my_knn()