KNN实例

from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import preprocessing

dictionary = {1: "no", 2: "yes", 3: "ok"}

def file2matrix(filename):
    file = open(filename, "r")
    arrayOnlines = file.readlines()
    numbersOfLine = len(arrayOnlines)
    elemInLine = len(arrayOnlines[0].strip().split("\t"))
    print(arrayOnlines[0].strip().split("\t"))
    print("element in every line is %d"%(elemInLine))
    returnMat = np.zeros(shape=(numbersOfLine, 3))
    classLabelVector = []
    index = 0
    for line in arrayOnlines:
        line = line.strip()
        listFromLine = line.split("\t")
        returnMat[index,:] = listFromLine[0: 3]
        classLabelVector.append(int(listFromLine[-1]))
        index += 1
    return returnMat, classLabelVector

def draw_picture(datingDatMat, datingLabels):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    #ax.scatter(datingDatMat[:, 1], datingDatMat[:, 2])
    ax.scatter(datingDatMat[:,1], datingDatMat[:,2], 15.0*np.array(datingLabels), 15.0*np.array(datingLabels))
    plt.show()

def autoNorm(dataSet):
    minvals = dataSet.min(0)
    maxvals = dataSet.max(0)
    ranges = maxvals - minvals
    normDataSet = np.zeros(np.shape(dataSet))
    m = dataSet.shape[0]
    normDataSet = dataSet - np.tile(minvals, (m, 1))
    normDataSet = normDataSet/np.tile(ranges, (m, 1))
    return normDataSet, ranges, minvals



if __name__ == "__main__":
    datingDatMat, datingLabels = file2matrix("./datingTestSet2.txt")
    #datingLabels =np.array(datingLabels)
    lst = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    for n_neighbor in lst:
        lenOfMat = len(datingDatMat)
        hoRatio = 0.1
        llen = int(lenOfMat*hoRatio)
        #llen = 100
        X = datingDatMat
        Y = datingLabels
        #x_scale, ranges, minvals = autoNorm(X)
        x_scale = preprocessing.scale(datingDatMat)    #将数据标准化处理即变为方差为战,均值为0
        trainingSet = x_scale[llen:]
        trainingLabel = datingLabels[llen:]
        testSet = x_scale[: llen]
        testLabel = datingLabels[: llen]
        KNN = KNeighborsClassifier(n_neighbors=n_neighbor)
        KNN.fit(trainingSet, trainingLabel)
        errorCount = 0
    scaler = preprocessing.StandardScaler().fit(datingDatMat)
    percentTats = float(input("1: "))
    ffMiles = float(input("2: "))
    ice = float(input("3: "))
    #inArr = np.array([ffMiles, percentTats, ice])
    inArr = np.array([ffMiles, percentTats, ice])
    inArr = scaler.transform(inArr.reshape(1, -1))
    value = KNN.predict(inArr)
    print(value)
    print(type(value[0]))
    print(dictionary[int(value)])

数据 点击打开链接
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值