数据来自于sklearn.datasets中的load_iris
模型为决策树
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import matplotlib as mpl
iris=load_iris()
print(iris.data.shape)
data=pd.DataFrame(iris.data,columns=iris.feature_names) #pd.DataFrame(A,index='',columns='')
data['target']=iris.target
x=data.iloc[:,:-2] #从pandas中的DataFrame中截取数据,需要用.iloc
y=data.iloc[:,-1:]
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.75,random_state=40)
tree_clf=DecisionTreeClassifier(max_depth=5,criterion='entropy')
tree_clf.fit(x_train,y_train)
y_predict=tree_clf.predict(x_test)
print('acc',accuracy_score(y_test,y_predict))
模型为随机森林
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
iris=load_iris()
data=pd.DataFrame(iris.data,columns=iris.feature_names)
data['target']=iris.target
x=data.iloc[:,:-1]
y=data.iloc[:,-1:]
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.75,random_state=40)
random_forest=RandomForestClassifier(n_estimators=20,max_leaf_nodes=16,n_jobs=20) #max_leaf_nodes 最大叶节点数,与max_depth选一即可
random_forest.fit(x_train,y_train)
y_predict=random_forest.predict(x_test)
p=accuracy_score(y_test,y_predict)
print(p)