1 ID3算法流程
理论知识不必多讲,下面讲解ID3算法的工作流程,以西瓜书为例:
2 算法代码讲解
2.1 前提必知
2.2 算法流程
3:代码
整个算法实际就 makeTree和distributionForInstance两个函数,下面把代码附上来,想要执行的话请下在weka平台,里面包含依赖的类。
package weka.classifiers.trees;
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Id3.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Sourcable;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.NoSupportForMissingValuesException;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import java.util.Enumeration;
/**
<!-- globalinfo-start -->
* Class for constructing an unpruned decision tree based on the ID3 algorithm. Can only deal with nominal attributes. No missing values allowed. Empty leaves may result in unclassified instances. For more information see: <br/>
* <br/>
* R. Quinlan (1986). Induction of decision trees. Machine Learning. 1(1):81-106.
* <p/>
<!-- globalinfo-end -->
*
<!-- technical-bibtex-start -->
* BibTeX:
* <pre>
* @article{Quinlan1986,
* author = {R. Quinlan},
* journal = {Machine Learning},
* number = {1},
* pages = {81-106},
* title = {Induction of decision trees},
* volume = {1},
* year = {1986}
* }
* </pre>
* <p/>
<!-- technical-bibtex-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -D
* If set, classifier is run in debug mode and
* may output additional info to the console</pre>
*
<!-- options-end -->
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision: 8109 $
*/
public class myID3
extends AbstractClassifier
implements TechnicalInformationHandler, Sourcable,Drawable{
/** for serialization */
static final long serialVersionUID = -2693678647096322561L;
/** the node's id */
private int m_id;
/** static count to assign the ids */
private static int ms_count = 0;
/** The node's successors. */
private myID3[] m_Successors;
/** Attribute used for splitting. */
private Attribute m_Attribute;
/** Class value if node is lea