3.2 求得的最近邻点是(2,3)
3.3 k近邻算法如下:
(思路:求得一个最近邻后,从kd树中删除这个结点)
#include <iostream>
#include <algorithm>
#include <stack>
#include <math.h>
using namespace std;
/*function of this program: build a 2d tree using the input training data
the input is exm_set which contains a list of tuples (x,y)
the output is a 2d tree pointer*/
struct data
{
double x = 0;
double y = 0;
};
struct Tnode
{
struct data dom_elt;
int split;
struct Tnode * left;
struct Tnode * right;
};
bool cmp1(data a, data b){
return a.x < b.x;
}
bool cmp2(data a, data b){
return a.y < b.y;
}
bool equal(data a, data b){
if (a.x == b.x && a.y == b.y)
{
return true;
}
else{
return false;
}
}
void ChooseSplit(data exm_set[], int size, int &split, data &SplitChoice){
/*compute the variance on every dimension. Set split as the dismension that have the biggest
variance. Then choose the instance which is the median on this split dimension.*/
/*compute variance on the x,y dimension. DX=EX^2-(EX)^2*/
double tmp1,tmp2;
tmp1 = tmp2 = 0;
for (int i = 0; i < size; ++i)
{
tmp1 += 1.0 / (double)size * exm_set[i].x * exm_set[i].x;
tmp2 += 1.0 / (double)size * exm_set[i].x;
}
double v1 = tmp1 - tmp2 * tmp2; //compute variance on the x dimension
tmp1 = tmp2 = 0;
for (int i = 0; i < size; ++i)
{
tmp1 += 1.0 / (double)size * exm_set[i].y * exm_set[i].y;
tmp2 += 1.0 / (double)size * exm_set[i].y;
}
double v2 = tmp1 - tmp2 * tmp2; //compute variance