第1关:实现kNN算法
#encoding=utf8
import numpy as np
class kNNClassifier(object):
def __init__(self, k):
'''
初始化函数
:param k:kNN算法中的k
'''
self.k = k
# 用来存放训练数据,类型为ndarray
self.train_feature = None
# 用来存放训练标签,类型为ndarray
self.train_label = None
def fit(self, feature, label):
'''
kNN算法的训练过程
:param feature: 训练集数据,类型为ndarray
:param label: 训练集标签,类型为ndarray
:return: 无返回
'''
#********* Begin *********#
self.train_feature = np.array(feature)
self.train_label = np.array(label)
#********* End *********#
def predict(self, feature):
'''
kNN算法的预测过程
:param feature: 测试集数据,类型为ndarray
:return: 预测结果,类型为ndarray或list
'''
#********* Begin *********#
def _predict(test_data):
distances = [np.sqrt(np.sum((test_data - vec) ** 2)) for vec in self.train_feature]
nearest = np.argsort(distances)
topK = [self.train_label[i] for i in nearest[:self.k]]
votes = {}
result = None
max_count = 0
for label in topK:
if label in votes.keys():
votes[label] += 1
if votes[label] > max_count:
max_count = votes[label]
result = label
else:
votes[label] = 1
if votes[label] > max_count:
max_count = votes[label]
result = label
return result
predict_result = [_predict(test_data) for test_data in feature]
return predict_result
#********* End *********#
第2关:红酒分类
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
def classification(train_feature, train_label, test_feature):
'''
对test_feature进行红酒分类
:param train_feature: 训练集数据,类型为ndarray
:param train_label: 训练集标签,类型为ndarray
:param test_feature: 测试集数据,类型为ndarray
:return: 测试集数据的分类结果
'''
#********* Begin *********#
scaler = StandardScaler()
train_feature = scaler.fit_transform(train_feature)
test_feature = scaler.transform(test_feature)
clf = KNeighborsClassifier()
clf.fit(train_feature, train_label)
return clf.predict(test_feature)
#********* End **********#