下面给出决策树的一些知识
#include <stdio.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <vector>
#include <math.h>
#include <conio.h>
#define maxn 15
#define EPS 0.3
struct st
{
int flag[5];
}stu[15],next1[15],next2[15];
int si[5];
struct Tree
{
Tree *t[maxn];
std::vector<int> ve1,ve2;
int flag;
int c;
}tree;
double cal_h(st s[],int len,int t)
{
int vis[10]={0};
std::vector <int> ve;
for(int i = 0;i<len;i++)
{
vis[s[i].flag[t]]++;
if(vis[s[i].flag[t]]>1) continue;
ve.push_back(s[i].flag[t]);
}
double sum = 0;
for(int i = 0;i<ve.size();i++)
{
if(vis[ve[i]]==0)
{
continue;
}
double temp = 1.0*vis[ve[i]]/len;
sum+=temp*(log(temp)/log(2));
}
return -sum;
}
void build_tree(Tree *root,int f_flag)
{
for(int i = 0;i<root->ve1.size();i++)
{
printf("%d ",root->ve1[i]);
}
printf("\n");
for(int i = 0;i<root->ve2.size();i++)
{
printf("%d ",root->ve2[i]);
}
printf("\n");
for(int i = 0;i<maxn;i++)
{
root->t[i] = NULL;
}
int vis[2]={0};
if(root->ve1.size()==0)
{
root->flag = f_flag;
return;
}
for(int i = 0;i<root->ve1.size();i++)
{
int temp = root->ve1[i];
vis[stu[temp].flag[4]]++;
}
if(vis[0]>=vis[1])
{
root->flag = 0;
}
else
{
root->flag = 1;
}
if(root->ve2.size()==0)
{
return;
}
double h;
for(int i= 0;i<root->ve1.size();i++)
{
next1[i] = stu[root->ve1[i]];
}
h = cal_h(next1,root->ve1.size(),4);
double ha[maxn];
for(int i = 0;i<root->ve2.size();i++)
{
int temp = root->ve2[i];
ha[temp] = 0;
if(temp==4) continue;
for(int j = 0;j<si[temp];j++)
{
int sum = 0;
for(int k = 0;k<root->ve1.size();k++)
{
int ss = root->ve1[k];
if(stu[ss].flag[temp]==j)
{
next1[sum] = stu[ss];
sum++;
}
}
ha[temp] += 1.0*sum/root->ve1.size()*cal_h(next1,sum,4);
}
ha[temp] = h-ha[temp];
}
int index;
double maxnum = -111111111;
for(int i = 0;i<root->ve2.size();i++)
{
int temp = root->ve2[i];
if(ha[temp]>maxnum) maxnum = ha[temp],index = temp;
}
if(ha[index]<EPS) return;
root->c = index;
for(int i = 0;i<si[index];i++)
{
root->t[i] = new Tree;
for(int j = 0;j<root->ve1.size();j++)
{
int a = root->ve1[j];
if(stu[a].flag[index]==i)
{
root->t[i]->ve1.push_back(a);
}
}
for(int j = 0;j<root->ve2.size();j++)
{
if(root->ve2[j]!=index)
{
root->t[i]->ve2.push_back(root->ve2[j]);
}
}
}
for(int i =0;i<si[index];i++)
{
build_tree(root->t[i],root->flag);
}
}
void init()
{
for(int i = 0;i<maxn;i++)
{
for(int j = 0;j<=4;j++)
{
scanf("%d",&stu[i].flag[j]);
}
}
for(int i = 0;i<=4;i++)
{
scanf("%d",&si[i]);
}
}
int main()
{
Tree *root = (Tree*)malloc(sizeof(Tree));
for(int i=0;i<15;i++)
{
root->ve1.push_back(i);
}
for(int i = 0;i<=4;i++)
{
root->ve2.push_back(i);
}
init();
build_tree(root,1);
return 0;
}
下面给出基于增益的代码,数据为李航的统计学习方法的数据。