AdaTree决策树算法(C5)C#源码

1、界面

基于GLC的地表覆盖分类技术流程

2、精度评价

基于GLC的地表覆盖分类技术流程

3、输出规则集

基于GLC的地表覆盖分类技术流程

4、部分源码

using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.Text.RegularExpressions;

namespace BoostTree
{
    class TreeClass : PublicClass
    {
        protected Tree TreeRun(ArrayList buffer, int treeHigh, int[] contribute)//构树主函数
        {
            if (treeHigh == 0)
                treeHigh = 7;
            int depth = treeHigh;//控制树的深度
            Tree treeNode = new Tree();//定义树结构
            ArrayList bandIDlist = new ArrayList();//存储属性索引
            int classID = ((String[])buffer[0]).Length - 1;
            for (int i = 0; i < classID; i++) bandIDlist.Add(i);//属性索引2,3,4,5,6,7,8,9,.....rNum-1
            builtTree(buffer, bandIDlist, treeNode, depth,classID,contribute);//递归构树

            return treeNode;

        }
         private void builtTree(ArrayList buffer, ArrayList bandIDlist, Tree fatherTree, int depth, int classID, int[] contribute)//递归主函数     在分裂结束条件上有待进一步验证,这个环节等待数的存储搞定之后
         {
             //bool yy = panduanData();
             //if (yy)
             {
                 Double[] bandDiscrete = new Double[bandIDlist.Count];//存储离散化后的取值范围
                 Double[] Ratio = new Double[bandIDlist.Count];
                 bandDiscrete = discrelizeALL(buffer, bandIDlist, Ratio, classID);

                 ArrayList classValue = getclassValue(buffer, classID);//获取类的取值   
                 ArrayList bestbandID = new ArrayList();
                 if (classValue.Count <= 1)
                 {
                     fatherTree.PreClass = setPreClass(buffer, classValue, classID);
                     ///叶子节点的样本保留
                     fatherTree.leafSample = buffer;
                     fatherTree.Cover = Convert.ToDouble(buffer.Count);
                     fatherTree.Miss = getMiss(buffer, fatherTree.PreClass, classID);
                 }
                 else
                 {

                     bestbandID = findbestbandID(Ratio, bandIDlist, bandIDlist.Count);
                     if (bestbandID == null)//ES-ESA=0
                     {
                         fatherTree.PreClass = setPreClass(buffer, classValue, classID);
                         //叶子节点的样本保留
                         fatherTree.leafSample = buffer;
                         fatherTree.Cover = Convert.ToDouble(buffer.Count);
                         fatherTree.Miss = getMiss(buffer, fatherTree.PreClass, classID);
                     }

                     else
                     {
                         int k = 0;//预留两个第一名接口
                         depth--;

                         Double attributeValue = bandDiscrete[bandIDlist.IndexOf((int)bestbandID[0])];//



                         fatherTree.CutBandID = (int)bestbandID[k];
                         ///贡献度计算
                         int featureNum = (int)bestbandID[0];
                         contribute[featureNum]++;
                         
                         Tree leftChild = new Tree();
                         Tree rightChild = new Tree();
                         fatherTree.DivisionValue = attributeValue;


                         ArrayList leftBuffer = new ArrayList();
                         ArrayList rightBuffer = new ArrayList();
                         for (int i = 0; i < buffer.Count; i++)
                         {
                             if (Convert.ToDouble(((string[])buffer[i])[(int)bestbandID[k]]) <= attributeValue)
                             {
                                 leftBuffer.Add(buffer[i]);
                             }
                             else
                             {
                                 rightBuffer.Add(buffer[i]);
                             }
                         }
                         if (depth > 0)
                         {

                             builtTree(leftBuffer, bandIDlist, leftChild, depth, classID, contribute);
                             builtTree(rightBuffer, bandIDlist, rightChild, depth, classID, contribute);

                         }
                         else
                         {
                             leftChild.PreClass = setPreClass(leftBuffer, classValue, classID);
                             //叶子节点的样本保留
                             leftChild.leafSample = leftBuffer;
                             leftChild.Cover = Convert.ToDouble(leftBuffer.Count);
                             leftChild.Miss = getMiss(leftBuffer, leftChild.PreClass, classID);

                             rightChild.PreClass = setPreClass(rightBuffer, classValue, classID);
                             //叶子节点的样本保留
                             rightChild.leafSample = rightBuffer;
                             rightChild.Cover = Convert.ToDouble(rightBuffer.Count);
                             rightChild.Miss = getMiss(rightBuffer, rightChild.PreClass, classID);
                         }

                         fatherTree.LeftChild = leftChild;
                         fatherTree.LeftChild.Parent = fatherTree;
                         fatherTree.RightChild = rightChild;
                         fatherTree.RightChild.Parent = fatherTree;




                     }
                 }
             }         


         }
         private Double[] discrelizeALL(ArrayList buffer, ArrayList bandIDlist, double[] Ratio,int classID)//连续属性离散化主函数
         {
             Double[] bandDiscrete = new Double[bandIDlist.Count];

             for (int i = 0; i < bandIDlist.Count; i++)
             {
                 double[] best = new double[1];
                 bandDiscrete[i] = discrelize(buffer, (int)bandIDlist[i], best, classID);//离散化
                 Ratio[i] = best[0];
             }
             return bandDiscrete;

         }
         protected ArrayList getclassValue(ArrayList buffer, int classID)//获取样本中所有类别号(通过验证)
         {
             ArrayList classValue = new ArrayList();//存储不同类别号
             string value = null;

             for (int i = 0; i < buffer.Count; i++)
             {
                 bool yn = true;//是否为新的类
                 for (int j = 0; j < classValue.Count; j++)
                 {
                     if (((string[])buffer[i])[classID] == ((string)classValue[j]))
                         yn = false;
                 }
                 if (yn)
                 {
                     value = ((string[])buffer[i])[classID];
                     classValue.Add(value);
                 }
             }
             return classValue;

         }
         protected string setPreClass(ArrayList Buffer, ArrayList classValue, int classID)//当节点为树叶时,找到样本数最多的类别
         {
             string PreClass;
             double[] sumWeitht = new double[classValue.Count];
             for (int i = 0; i < Buffer.Count; i++)
             {
                 for (int j = 0; j < classValue.Count; j++)
                     if (((string[])(Buffer[i]))[classID] == (string)classValue[j])
                         sumWeitht[j]++;
             }
             int max = 0;
             for (int j = 0; j < classValue.Count; j++)
             {
                 if (sumWeitht[j] > sumWeitht[max])
                     max = j;
             }
             PreClass = (string)classValue[max];

             return PreClass;
         }
         private ArrayList findbestbandID(Double[] GainRatio, ArrayList bandIDlist, int Num)//找出信息增益比率最高的属性索引号,这里可能会有多个(此函数被两个地方条用,且计算内容不同)
         {
             ArrayList bestbandID = new ArrayList();
             ArrayList bestGainRatioID = new ArrayList();
             bestGainRatioID.Add(0);
             for (int i = 1; i < Num; i++)
             {
                 if ((double)GainRatio[i] == (double)GainRatio[(int)bestGainRatioID[0]])//
                 {
                     bestGainRatioID.Add(i);
                 }
                 else
                 {
                     if ((double)GainRatio[i] > (double)GainRatio[(int)bestGainRatioID[0]])
                     {
                         bestGainRatioID.Clear(); //如果大于清空数组
                         bestGainRatioID.Add(i);
                     }
                 }

             }

             if ((double)GainRatio[(int)bestGainRatioID[0]] == 0.0)//最好的属性信息增益比率为0,证明现有的所有属性各自的取值唯一,达到停止分裂的条件
                 return null;
             else
             {
                 //如果有多个属性的信息增益比率相同怎么办?
                 //改随机赋予2012年12月21日,张晓贺
                 Random ran = new Random();
                 int RandKey = ran.Next(0, bestGainRatioID.Count-1);
                 //bestbandID.Add(bandIDlist[(int)bestGainRatioID[0]]);
                 bestbandID.Add(bandIDlist[(int)bestGainRatioID[RandKey]]);
             }
             return bestbandID;
         }
         private Double discrelize(ArrayList buffer, int bandID, double[] best, int classID)//确定候选断点
         {
             ///分裂值
             double divisionValue = 0.0;

             //int depth = 1;//控制离散的深度,这里对应树的节点最多有2的depth次方棵子树
             ArrayList bandDiscrelize = new ArrayList();
             ArrayList bandWait = new ArrayList();
             ArrayList attributeSort = new ArrayList();
             ArrayList classValue = getclassValue(buffer, classID);//获取类的取值   
           
             for (int i = 0; i < buffer.Count; i++)
             {
                 double value = Convert.ToDouble(((string[])buffer[i])[bandID]);//
                 attributeSort.Add(value);

             }
             attributeSort.Sort();
             for (int i = 0; i < attributeSort.Count - 1; i++)//找出候选断点,即类别的中间值
             {
                 double c = 0.0;
                 double a=(double)attributeSort[i];
                 double b=(double)attributeSort[i+1];

                 if (a!=b)//
                 {

                     c = (a + b) / 2.0;
                     bandWait.Add(c);
                 }

             }
             bandWait.Sort();
             if (bandWait.Count != 0)//对有些不符合离散化条件的属性不做处理
             {
                 divisionValue = getcutPoint(buffer, classValue, bandWait, bandID, best, classID);
             }


             return divisionValue;

         }
         private Double getcutPoint(ArrayList buffer, ArrayList classValue, ArrayList bandWait, int bandID, double[] best,int classID)//离散化递归函数
         {
             ArrayList GainRatio = new ArrayList();
             ArrayList bestValue;//最好的候选断点
             Double cutBest;
             for (int i = 0; i < bandWait.Count; i++)
             {
                 double cutValue = Convert.ToDouble(bandWait[i]);
                 GainRatio.Add(calculateGainRatio(buffer, cutValue, classValue, bandID,classID));//计算每个候选断点的信息增益                
             }
             bestValue = findbestValue(GainRatio, bandWait);
             cutBest = Convert.ToDouble(bestValue[0]);
             
             double max = 0.0;
             for (int j = 0; j < bandWait.Count; j++)
             {
                 if (((double)(GainRatio[j])) > max)
                 {
                     max = ((double)(GainRatio[j]));
                 }
             }
             best[0] = max;

             return cutBest;
         }
         private ArrayList findbestValue(ArrayList Gain, ArrayList bandWait)//找出信息增益比率最高的属性索引号
         {
             ArrayList bestValue = new ArrayList();
             ArrayList bestGainID = new ArrayList();
             bestGainID.Add(0);
             for (int i = 1; i < Gain.Count; i++)
             {
                 if ((double)Gain[i] == (double)Gain[(int)bestGainID[0]])//
                 {
                     bestGainID.Add(i);
                 }
                 else
                 {
                     if ((double)Gain[i] > (double)Gain[(int)bestGainID[0]])
                     {
                         bestGainID.Clear(); //如果大于清空数组
                         bestGainID.Add(i);
                     }
                 }

             }

             if ((double)Gain[(int)bestGainID[0]] == 0.0)//最好的属性信息增益为0
                 bestValue.Add(0.0);
             else
             {
                 // for (int i = 0; i < bestGainRatioID.Count; i++)//如果有多个属性的信息增益相同怎么办?
                 bestValue.Add(bandWait[(int)bestGainID[0]]);
                 // output.Add(bandIDlist[(int)bestGainRatioID[0]]);//测试用

             }
             return bestValue;
         }
         private double calculateGainRatio(ArrayList buffer, Double bandDiscrete, ArrayList classValue, int bandID,int classID)//递归子函数,计算GainRatio
         {

             double Gain = calculateGain(buffer, classValue, bandDiscrete, bandID,classID);
             double SplitI = calculateSplitI(buffer, bandDiscrete, bandID);
             if (SplitI != 0.0)//某划分中可能有某个属性的值唯一
                 return (Gain / SplitI);
             else
                 return 0.0;
         }
         private double calculateGain(ArrayList buffer, ArrayList classValue, Double attributeValue, int bandID,int classID)//
         {
             double ESA = 0.0;
             double ES = calculateES(buffer, classValue, classID);



             //分为左右两段
             ArrayList leftBuffer = new ArrayList();
             ArrayList rightBuffer = new ArrayList();

             for (int j = 0; j < buffer.Count; j++)
             {
                 if (Convert.ToDouble(((string[])buffer[j])[bandID]) <= attributeValue)
                 {
                     leftBuffer.Add(buffer[j]);
                 }
                 else
                 {
                     rightBuffer.Add(buffer[j]);
                 }
             }
             double enL = 0.0;
             double enR = 0.0;
             enL = (calculateES(leftBuffer, classValue, classID)) * (((double)leftBuffer.Count) / ((double)buffer.Count));
             enR = (calculateES(rightBuffer, classValue, classID)) * (((double)rightBuffer.Count) / ((double)buffer.Count));
             ESA = enL + enR;


             double Gain = ES - ESA;
             return Gain;
         }
         private double calculateSplitI(ArrayList buffer, Double attributeValue, int index)//递归子函数,计算SplitI
         {
             double SplitI = 0.0;
             double[] cNumW = new double[2];//定义数组存储每个类的实例个数
             for (int j = 0; j < 2; j++) cNumW[j] = 0;//给数组赋初值  待验证是否需要
             //计算某属性所有取值出现的次数
             for (int i = 0; i < buffer.Count; i++)
             {

                 if (Convert.ToDouble(((string[])buffer[i])[index]) <= attributeValue)
                     cNumW[0]++;
                 else
                     cNumW[1]++;
             }
             for (int j = 0; j < 2; j++)
             {
                 if (cNumW[j] != 0)
                 {
                     double pi = cNumW[j] / ((double)buffer.Count);
                     SplitI += Math.Log(pi) / Math.Log(2.0) * pi;
                 }
             }
             return -SplitI;

         }
         private double calculateES(ArrayList buffer, ArrayList attributeValue, int index)//递归子函数,计算E(S)(验证通过)
         {
             double es = 0.0;
             double[] cNumW = new double[attributeValue.Count];//定义数组存储每个类的实例个数
             for (int j = 0; j < attributeValue.Count; j++) cNumW[j] = 0.0;//给数组赋初值
             //计算某个类别的实例个数
             for (int i = 0; i < buffer.Count; i++)
                 for (int j = 0; j < attributeValue.Count; j++)
                 {
                     if (((string[])buffer[i])[index] == ((string)attributeValue[j]))
                         cNumW[j]++;
                 }
             for (int j = 0; j < attributeValue.Count; j++)
             {
                 if (cNumW[j] != 0)
                 {
                     double pi = cNumW[j] / ((double)buffer.Count);
                     es += Math.Log(pi) / Math.Log(2.0) * pi;
                 }
             }
             return -es;
         }
        
       
    }
}

5、完整个代码:https://download.csdn.net/download/sailingw/14919831

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

海亲王

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值