示例代码
iris鸢尾花数据集是入门级的图像分类数据集,通过这个简单的demo,我们可以快速上手xgboost的使用,具体代码如下:
from sklearn.datasets import load_iris
from xgboost.sklearn import XGBClassifier
from xgboost import plot_importance
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# 记载样本数据集
iris = load_iris()
x,y = iris.data,iris.target
# 数据集分割
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=123457)
clf = XGBClassifier(
booster = 'gbtree',
objective = 'multi:softmax',
num_class = 3,
gamma = 0.1,
max_depth = 6,
reg_lambda = 2,
subsample = 0.7,
colsample_bytree = 0.7,
min_child_weight = 3,
eta = 0.1,
seed = 1000,
nthread = 4,
)
#训练模型
clf.fit(x_train,y_train,eval_metric='auc')
# 对测试集进行预测
y_pred = clf.predict(x_test)
#计算准确率
accuracy = accuracy_score(y_test,y_pred)
print('accuracy:%2.2f%%'%(accuracy*100))
# 显示重要特征
plot_importance(clf)
plt.show()
分类结果
最终分类准确率为:
打印特征重要性: