运行此代码并通过graphviz生成树图像后,我们可以观察到树中每个节点上都有值数据。在import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_graphviz
dta = pd.read_csv("data/adult.data.cleaned.csv")
for col in dta:
if not dta[col].dtype.kind == "O":
continue
if dta[col].str.contains("\?").any():
dta.ix[dta[col].str.contains("\?"), col] = "Other"
test.ix[test[col].str.contains("\?"), col] = "Other"
dta.income.replace({"<=50K": 0, ">50K": 1}, inplace=True)
test.income.replace({"<=50K": 0, ">50K": 1}, inplace=True)
y = dta.pop("income")
y_test = test.pop("income")
X_train = pd.get_dummies(dta)
X_test = pd.get_dummies(test)
X_test[X_train.columns.difference(X_test.columns)[0]] = 0
dtree = DecisionTreeClassifier(criterion='entropy', random_state=0, max_depth=6)
dtree.fit(X_train, y)
export_graphviz(dtree, feature_names=X_train.columns)
值属性代表什么?
EDIT:意味着在每个节点中都有一个value=[x, y]属性