创建2分类数据集 并进行简单决策树分类
代码示例:
import numpy as np
from pandas import DataFrame
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
# 以y1 = x**2 曲线为分隔线进行分类
x = np.linspace(0,1,100)
z = norm.rvs(loc=0, size=100, scale=1)
y1 = x**2
y = x**2 + z
df = DataFrame({"x":x,"y":y,"class":z})
df["class"][df["class"]>=0] = 1
df["class"][df["class"]<0] = 0
xx = df[["x","y"]]
yy = df["class"]
# 分割训练与测试集
X_train,X_test,y_train,y_test = train_test_split(xx,yy,test_size=0.3)
# 选用决策树进行学习及预测,并画图
cf = DecisionTreeClassifier(max_leaf_nodes=10)
cf.fit(X_train,y_train)
xxx = DataFrame(X_test)['x']
yyy = DataFrame(X_test)['y']
zzz = cf.predict(X_test)
print cf.score(X_test,y_test)
plt.plot(x,y1)
plt.scatter(xxx,yyy,c=zzz)
plt.show()
分割效果图: