# take3-决策树

import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree

from sklearn.model_selection import train_test_split

# 分类树

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)

print ("Classifier Score:", clf.score(X_test, y_test))

tree.plot_tree(clf.fit(X, y))
plt.show()

# 回归树
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
clf = tree.DecisionTreeRegressor()
clf = clf.fit(X_train, y_train)

print ("Regression Score:", clf.score(X_test, y_test))
tree.plot_tree(clf.fit(X, y))
plt.show()



 def gini(self, labels):
"""计算基尼指数.

Paramters:
----------
labels: list or np.ndarray, 数据对应的类目集合.

Returns:
-------
gini : float Gini(p) = \sum_{k=1}^{K}p_k(1-p_k)=1-\sum_{k=1}^{K}p_k^2 

"""
#============================= show me your code =======================

# here

n = labels.shape[0] #数据集总行数
iset = labels.iloc[:,-1].value_counts() #标签的所有类别
p = iset/n #每一类标签所占比
p=p**2;
gini =1-p.sum() #计算gini
#============================= show me your code =======================
return gini


©️2019 CSDN 皮肤主题: 游动-白 设计师: 上身试试