KNN实现Iris数据分类

       

                

运行结果:

           

完整代码:

#导入K近邻算法所使用的包
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.spatial import distance
import numpy as np
import operator

#计算欧氏距离
def my_matEuclidean(row,Matrix):
    dataSetSize = Matrix.shape[0]
    diffMat     = np.tile(row,(dataSetSize,1)) - Matrix
    sqDiffMat   = diffMat ** 2
    sqDistance  = sqDiffMat.sum(axis = 1)
    distance    = sqDistance ** 0.5
    
    return distance


#KNN算法实现
class MyKNN:
    def __init__(self, n_neighbors):
        self.n_neighbors = n_neighbors
        self.X_train     = None
        self.y_train     = None
        
    def fit(self,X_train,y_train):
        self.X_train     = X_train
        self.y_train     = y_train
        
    def predict(self, X_test):
        predictions      = []
        
        for row in X_test:
            label        = self.__closest(row)
            predictions.append(label)
            
        return predictions
    
    def __closest(self, row):
        distance = my_matEuclidean(row, X_train)
        sortedDistance = distance.argsort()
        
        classCount = {}
        for i in range(self.n_neighbors):
            voteLabel  = y_train[sortedDistance[i]]
            classCount[voteLabel] = classCount.get(voteLabel,0) + 1
        
        maxCount = 0
        for key, value in classCount.items():
            if value > maxCount:
                maxCount = value
                classes  = key
        
        return classes
        
#加载数据集并测试KNN分类器
iris = datasets.load_iris()
X    = iris.data
Y    = iris.target

#随机划分训练集与测试集
X_pretrain, X_pretest, y_train, y_test = train_test_split(X, Y,test_size = 0.3)

#归一化
minMax  = MinMaxScaler()
X_train = minMax.fit_transform(X_pretrain)
X_test  = minMax.fit_transform(X_pretest)

#测试
best_score = 0.0
best_k     = 0

for k in range(30,1,-1):
    my_classification = MyKNN(k)
    my_classification.fit(X_train, y_train)
    predictions       = my_classification.predict(X_test)
    score             = accuracy_score(y_test, predictions)
    
    if score > best_score:
        best_score    = score
        best_k        = k
        
        print("Best score is: ",best_score)
        print("Best k is: ",best_k)
        

 

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值