sklearn中封装了决策树的算法
代码如下:
n [1]: # LOAD PACKAGES
In [2]: from sklearn import tree
In [3]: from pandas import read_table, DataFrame
In [4]: from os import system
In [5]: # IMPORT DATA
In [6]: data = read_table('/home/liuwensui/Documents/data/credit_count.txt', sep = ',')
In [7]: # DEFINE THE RESPONSE
In [8]: Y = data[data.CARDHLDR == 1].BAD
In [9]: # DEFINE PREDICTORS
In [10]: X = data.ix[data.CARDHLDR == 1, "AGE":"EXP_INC"]
In [11]: # SPECIFY TREE CLASSIFIER
In [12]: dtree = tree.DecisionTreeClassifier(criterion = "entropy", min_samples_leaf = 500, compute_importances = True)
In [13]: dtree = dtree.fit(X, Y)
In [14]: # PRINT OUT VARIABLE IMPORTANCE
In [15]: print DataFrame(dtree.feature_importances_, columns = ["Imp"], index = X.columns).sort(['Imp'], ascending = False)
Imp
INCOME 0.509823
INCPER 0.174509
AGE 0.099996
EXP_INC 0.086134
ACADMOS 0.070118
MINORDRG 0.059420
ADEPCNT 0.000000
MAJORDRG 0.000000
OWNRENT 0.000000
SELFEMPL 0.000000
In [16]: # OUTPUT DOT LANGUAGE SCRIPT
In [17]: dotfile = open("/home/liuwensui/Documents/code/dtree2.dot", 'w')
In [18]: dotfile = tree.export_graphviz(dtree, out_file = dotfile, feature_names = X.columns)
In [19]: dotfile.close()
In [20]: # CALL SYSTEM TO DRAW THE GRAPH
In [21]: system("dot -Tpng /home/liuwensui/Documents/code/dtree2.dot -o /home/liuwensui/Documents/code/dtree2.png")
Out[21]: 0