使用Adaboost进行分类的基本思想是基于弱分类器的Boosting, 在每次迭代的样本中,增加上一次迭代中分类错误的样本的权重。
下面是基本的使用Adaboost和决策树(层数为1)对make_circle进行分类。
# coding: utf-8
# Referenzen
# - https://scikit-learn.org/stable/modules/ensemble.html#adaboost
# - https://xavierbourretsicotte.github.io/AdaBoost.html
# - [1] T. Hastie, R. Tibshirani and J. Friedman, "Elements of Statistical Learning Ed. 2", Springer, 2009.
from sklearn.tree import DecisionTreeClassifier
import numpy as np
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import utils
def plot_dataset(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray) -> None:
"""Creates as scatterplot for the given dataset. The points for the first class are blue and the points for the
second class are red. Training points are displayed as dots and testpoints are described by an X."""
plt.figure(figsize=(6,5))
plt.scatter(*X_train.T, c=y_train, cmap=ListedColormap(["#FF0000", "#0000FF"]),label="Training Points", marker="o")
plt.scatter(*X_test.T, c=y_test, cmap=ListedColormap(["#FF0000", "#0000FF"]), label="Test Points", marker="x")
plt.xlabel("$X_0$")
plt.ylabel("$X_1$")
plt.legend()
plt.show()
def plot_decision_boundary_stump(stump: DecisionTreeClassifier, X: np.ndarray, y: np.ndarray, N=1000) -> None:
"""Plot the decision boundary for a tree stump and scatters plot of the training data"""
x_min, x_max=X[:,0].min()-