#分类
#导包
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
plt.rcParams['font.family']="KaiTi"
plt.rcParams['axes.unicode_minus'] = False
#加载数据
dataset=load_iris()
x=dataset.data
y=dataset.target
#数据预处理(数据清洗:处理缺失值,重复行,异常值,归一化)
#特征提取
#数据划分
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.25,random_state=2)
#模型训练
model=DecisionTreeClassifier()
model.fit(x_train,y_train)
#数据预测
pred_y=model.predict(x_test)
print(pred_y)
#模型准确率
print(model.score(x_test,y_test))
#可视化
plt.subplot(121)
class0=[i for i in range(len(pred_y)) if pred_y[i]==0]
class1=[i for i in range(len(pred_y)) if pred_y[i]==1]
class2=[i for i in range(len(pred_y)) if pred_y[i]==2]
for i in class0:
plt.scatter(x_test[i,0],x_test[i,2],c='red',marker='1')
for i in class1:
plt.scatter(x_test[i,0],x_test[i,2],c='green',marker='2')
for i in class2:
plt.scatter(x_test[i,0],x_test[i,2],c='blue',marker='3')
plt.subplot(122)
class0=[i for i in range(len(pred_y)) if y_test[i]==0]
class1=[i for i in range(len(pred_y)) if y_test[i]==1]
class2=[i for i in range(len(pred_y)) if y_test[i]==2]
for i in class0:
plt.scatter(x_test[i,0],x_test[i,2],c='red',marker='1')
for i in class1:
plt.scatter(x_test[i,0],x_test[i,2],c='green',marker='2')
for i in class2:
plt.scatter(x_test[i,0],x_test[i,2],c='blue',marker='3')
plt.show()