kNN算法

算法核心包括三点
1.k值的选择
2.距离的度量
3.分类决策规则
下面是c++源代码实现

#include "stdafx.h"
#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
using namespace std;

class sample {
public:
    vector<double> X;
    int label;
    double dis;
    int result;
};

bool cmp(sample &s1, sample &s2) {
    return s1.dis < s2.dis;
}

void knn(sample newx, vector<sample> &traindata, vector<sample> &nearestsample) {
    int m = traindata.size();
    int n = traindata[0].X.size();
    double distance = 0;
    for (int i = 0; i < m; i++) {
    //采取欧氏距离进行距离度量
        distance = 0;
        for (int j = 0; j < n; j++) {
            distance += (newx.X[j] - traindata[i].X[j]) * (newx.X[j] - traindata[i].X[j]);
        }
        traindata[i].dis = sqrt(distance);
    }
    sort(traindata.begin(), traindata.end(), cmp);
    int k = nearestsample.size();
    for (int i = 0; i < k; i++) {
        nearestsample[i] = traindata[i];
    }
}

int max(int *a, int n) {
    int maximum = a[0];
    int maxindex = 0;
    int temp = 0;
    for (int i = 1; i < n; i++) {
        if (a[i] > maximum) {
            maximum = a[i];
            maxindex = i;
        }
    }
    return maxindex;
}

int main() {
    ifstream indata;
    vector<sample> traindata, testdata;
    sample rowdata;
    double temp;
    int fea = 4;
    indata.open("D://machineLearning/traindata.txt");
    while (!indata.eof()) {
        for (int i = 0; i < fea + 1; i++) {
            indata >> temp;
            if (i < fea)
                rowdata.X.push_back(temp);
            else
                rowdata.label = temp;
        }
        traindata.push_back(rowdata);
        rowdata.X.erase(rowdata.X.begin(), rowdata.X.end());
    }
    indata.close();
    indata.open("D://machineLearning/testdata.txt");
    while (!indata.eof()) {
        for (int i = 0; i < fea + 1; i++) {
            indata >> temp;
            if (i < fea)
                rowdata.X.push_back(temp);
            else
                rowdata.label = temp;
        }
        testdata.push_back(rowdata);
        rowdata.X.erase(rowdata.X.begin(), rowdata.X.end());
    }
    indata.close();

    int N = testdata.size();
    vector<sample> nearestsample(5);
    int label[3] = { 0 };
    int resultlabel[3] = { 0,1,2 };
    for (int i = 0; i < N; i++) {
        knn(testdata[i], traindata, nearestsample);
        label[0] = label[1] = label[2] = 0;
        //分类决策选择简单的投票法
        for (int j = 0; j < 5; j++) {
            if (nearestsample[j].label == 0)
                label[0]++;
            else {
                if (nearestsample[j].label == 1)
                    label[1]++;
                else
                    label[2]++;
            }
        }
        testdata[i].result = resultlabel[max(label, 3)];
    }
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < fea; j++) {
            cout << testdata[i].X[j] << " ";
        }
        cout << testdata[i].label << " ";
        cout << testdata[i].result << " ";
        cout << endl;
    }
    getchar();
    return 0;

}

优点:实现简单,对异常值不敏感
缺点:计算复杂,空间复杂度高

参考:
http://blog.csdn.net/mimi9919/article/details/51172095)http://blog.csdn.net/mimi9919/article/details/51172095

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值