机器学习sklearn之knn拟合iris数据集的实现

knn拟合sklearn中的iris数据集

class sklearn.neighbors.KNeighborsClassifier(n_neighbors=5, weights=’uniform’, algorithm=’auto’, leaf_size=30, p=2, metric=’minkowski’, metric_params=None, n_jobs=1, **kwargs) 
  1. n_neighbors:选取几个邻居

  2. weights:邻居的权重是平均呢,还是越重话语权越大呢

  3. algorithm:后面再展开

        ‘ball_tree’ will use BallTree 
        ‘kd_tree’ will use KDTree 
        ‘brute’ will use a brute-force search. 
        ‘auto‘默认选项,是看哪个好用那个 
    
  4. leaf_size:只有ball_tree和kd_tree才有必要

  5. p与metric:距离表示,p=1是manhattan_distance,p=2是euclidean_distance。

    from sklearn import neighbors
    from sklearn import datasets
    #导出k近邻算法,并导出数据集
    knn=neighbors.KNeighborsClassifier()
    
    iris=datasets.load_iris()
    #在数据集中找到iris
    #print(iris)
    
    knn.fit(iris.data,iris.target)
    #对数据集进行拟合
    
    predictedlabel =knn.predict([[0.1,0.2,0.3,0.4]])
    print(predictedlabel)#对数据进行预测
    
    
    import csv
    import random
    import math
    import operator
     
    #导入数据,并分为训练集和测试集
    def loadDataset(filename, split, trainingSet = [], testSet = []):
        with open(filename, 'rt') as csvfile:
            lines = csv.reader(csvfile)
            dataset = list(lines)
            for x in range(len(dataset)-1):
                for y in range(4):
                    dataset[x][y] = float(dataset[x][y])
                if random.random() < split:
                    trainingSet.append(dataset[x])
                else:
                    testSet.append(dataset[x])
    #求欧拉距离
    def euclideanDistance(instance1, instance2, length):
        distance = 0
        for x in range(length):
            distance += pow((instance1[x]-instance2[x]), 2)
        return math.sqrt(distance)
    #计算最近邻(K个数据集),testInstance是实例
    def getNeighbors(trainingSet, testInstance, k):
        distances = []
        length = len(testInstance)-1
        for x in range(len(trainingSet)):
            #testinstance
            dist = euclideanDistance(testInstance, trainingSet[x], length)
            distances.append((trainingSet[x], dist))#distance是一个多个元组的list
            #distances.append(dist)
        distances.sort(key=operator.itemgetter(1))#按照dist排序
        neighbors = []
        for x in range(k):
            neighbors.append(distances[x][0])#要的是数据集
            return neighbors
    #投票法找出最近邻的结果哪种最多
    def getResponse(neighbors):
        classVotes = {}#key--花名字 value--个数
        for x in range(len(neighbors)):
            response = neighbors[x][-1]
            if response in classVotes:
                classVotes[response] += 1
            else:
                classVotes[response] = 1
        sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
        return sortedVotes[0][0]
    #求出精确性
    def getAccuracy(testSet, predictions):
        correct = 0
        for x in range(len(testSet)):
            if testSet[x][-1] == predictions[x]:
                correct += 1
        return (correct/float(len(testSet)))*100.0
    def main():
        #prepare data
        trainingSet = []
        testSet = []
        split = 0.8
        loadDataset('irisdata.txt', split, trainingSet, testSet)
        print('Train set: '+ repr(len(trainingSet)))
        print('Test set: ' + repr(len(testSet)))
        #generate predictions
        predictions = []
        k = 3
        for x in range(len(testSet)):
            # trainingsettrainingSet[x]
            neighbors = getNeighbors(trainingSet, testSet[x], k)
            result = getResponse(neighbors)
            predictions.append(result)
            print ('>predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
        accuracy = getAccuracy(testSet, predictions)
        print('Accuracy: ' + repr(accuracy) + '%')
     
    if __name__ == '__main__':
        main()
    
  • 2
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值