KD树——UCI数据集IRIS

采用惰性KD树方法进行学习
测试准确率非常高,在90%以上;实现中对数据做了预处理,即将四个特征的值都映射到了[0,100]之间,但是测试时发现做不做预处理得到的准确率相近

推测原因:上述两个现象表明数据的分类应该比较明显。

#include <cstdio>
#include <queue>
#include <algorithm>
#include <cstring>
using namespace std;
#define INF 0x3fffffff
#define clr(s,t) memset(s,t,sizeof(s));
#define TRAIN 120
#define TEST 30
#define N (TRAIN+5)
#define Q (TEST+5)
#define K 5//K近邻
#define D 4//特征数量,iris数据集有4个特征
int idx,son[N<<2];
struct point{
    double s[D];
    int type;
    bool operator<(const point &b)const{
        return s[idx]<b.s[idx];
    }
}data[N],base,kdt[N<<2];
struct node{
    point pp;
    double dis;
    bool operator<(const node &b)const{
        return dis < b.dis;
    }
}res[K+5];
priority_queue<struct node> q;
double up[D],low[D];
char kind[N][20],str[N][20],type[20];
int len;
int find(char* x){
    for(int i = 0;i<=len;i++)
        if(!strcmp(x, str[i]))
            return i;
    strcpy(str[++len], x);
    return len;
}
void input(){
    int i,j;
    len = -1;
    for(j = 0;j<4;j++){
        up[j] = 0;
        low[j] = INF;
    }
    for(i = 0;i<TRAIN;i++){
        scanf("%lf,%lf,%lf,%lf,%s\n",&data[i].s[0],&data[i].s[1],&data[i].s[2],&data[i].s[3],kind[i]);
        j = find(kind[i]);                  //标记训练集的类别
        data[i].type = j;                   //存入数据结构,后面多数表决的时候需要用到
        for(j = 0;j<4;j++){                 //为将数据映射到同一个区间,需要用到某特征最大值和最小值
            up[j] = max(up[j],data[i].s[j]);
            low[j] = min(low[j],data[i].s[j]);
        }
    }
    for(i = 0;i<TRAIN;i++)                  //把原始数据化为[0,100]之间
        for(j = 0;j<4;j++);
            //data[i].s[j] = 100*(data[i].s[j]-low[j])/(up[j]-low[j]);
}
void build_KD(int r,int a,int b,int d){
    int mid = (a+b)>>1;
    if(a>b)
        return;
    idx = d%D;
    son[r] = b;
    son[r*2] = son[r*2+1] = -1;
    nth_element(data+a, data+mid, data+b+1);
    kdt[r] = data[mid];
    build_KD(r*2, a, mid-1, d+1);
    build_KD(r*2+1, mid+1, b, d+1);
}
double dist(point a,point b){
    double sum = 0;
    for(int j = 0;j<D;j++)
        sum += (a.s[j]-b.s[j])*(a.s[j]-b.s[j]);
    return sum;
}
void query(int r,int d){
    int id = d%D,flag = 1;
    if(son[r] == -1)
        return;
    node tmp;
    tmp.pp = kdt[r];
    tmp.dis = dist(base,kdt[r]);
    int x = r*2;
    int y = r*2+1;
    if(base.s[id] > tmp.pp.s[id])
        swap(x, y);
    query(x, d+1);
    if(q.size()<K)
        q.push(tmp);
    else{
        if(q.top().dis > tmp.dis){
            q.pop();
            q.push(tmp);
        }
        if(q.top().dis <= (tmp.pp.s[id]-base.s[id])*(tmp.pp.s[id]-base.s[id]))
            flag = 0;
    }
    if(flag)
        query(y, d+1);
}
int guess(){
    int i,j;
    int flag[N];
    clr(flag, 0);
    for(i = 0;i<K;i++){
        flag[q.top().pp.type]++;            //多数表决
        q.pop();
    }
    for(i = j = 0;i<=len;i++)
        if(flag[i] > flag[j])
            j = i;
    return j;
}
void classification(){
    int i,j,num = 0;
    for(i = 0;i<TEST;i++){
        scanf("%lf,%lf,%lf,%lf,%s\n",&base.s[0],&base.s[1],&base.s[2],&base.s[3],type);
        for(j = 0;j<4;j++);
         //   base.s[j] = 100*(base.s[j]-low[j])/(up[j]-low[j]);
        query(1,0);
        j = guess();
        if(j == find(type))
            num++;
        printf("第%d个我预测为%s,实际为%s\n",i+1,str[j],type);
    }
    printf("正确率为:%lf\n",(double)num/TEST);
}
int main(){
    freopen("4.txt","r",stdin);
    input();
    build_KD(1,0,TRAIN-1,0);
    classification();
    return 0;
}

输入:

5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.2,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.6,1.4,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica




5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值