$f(x) = \sum_{m=1}^{M} \alpha_{m}G_{m}(x)$

$L(y, f(x))=exp[-yf(x)]$时，其学习的具体操作等价于AdaBoost算法学习的具体操作。

\begin{aligned} f_{m-1}(x) &= f_{m-2}(x) + \alpha_{m-1}G_{m-1}(x) \\ &= \alpha_{1}G_{1}(x) + ... + \alpha_{m-1}G_{m-1}(x) \end{aligned}

$f_{m}(x) = f_{m-1}(x) + \alpha_{m}G_{m}(x)$

\begin{aligned} (\alpha_m,G_{m}(x)) & = \arg\min_{\alpha,G} \sum_{i=1}^N L(y_i, f_{m}(x)) \\ & = \arg\min_{\alpha,G} \sum_{i=1}^N exp[-y_i(f_{m-1}(x_i)+\alpha G(x_i))] \tag1 \end{aligned}

$L(y_i,f(x)) = (y_i - f(x))^2$

\begin{aligned} L(y_i,f_{m-1}(x_i)+\alpha G(x_i)) & = (y_i - f_{m-1}(x_i) - \alpha G(x_i))^2 \\ & = (r_{mi} - \alpha G(x_i))^2 \end{aligned}

$\;$

\begin{aligned} (\alpha_m,G_m) = \arg\min_{\alpha,G} \sum_{i=1}^N \overline{w}_{mi} exp(-\alpha y_i G(x_i)) \end{aligned} \tag2

\begin{aligned} \sum_{i=1}^N & \overline{w}_{mi} exp(-\alpha y_i G(x_i)) \\ & = e^{-\alpha} \sum_{y_i=G_m(x_i)} \overline w_{mi} + e^{\alpha} \sum_{y_{i} = \not G_{m}(x_i)} \overline w_{mi} \\ & = e^{-\alpha} \sum_{y_i=G_m(x_i)} \overline w_{mi} + e^{\alpha} \sum_{y_i = \not G_m(x_i)} \overline w_{mi} + e^{-\alpha} \sum_{y_{i} = \not G_m(x_i)} \overline w_{mi} - e^{-\alpha} \sum_{y_i = \not G_m(x_i)} \overline w_{mi} \\ \end{aligned}

\begin{aligned} (e^{\alpha} - e^{-\alpha}) \sum_{i=1}^N \overline w_{mi} I(y_i = \not G_m(x_i)) + e^{-\alpha} \sum_{i=1}^N \overline w_{mi} \tag 3 \end{aligned}

$G^{*}_{m}(x) = \arg\min_G \sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i))$此分类器$G^{*}_{m}(x)$即为AdaBoost算法的基本分类器$G_{m}(x)$，因此它是第m轮加权训练数据分类误差率最小的基本分类器。

$\frac {\partial_L} {\partial_{\alpha}} = e^{\alpha} \sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i)) + e^{-\alpha} \sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i)) - e^{-\alpha} \sum_{i=1}^N \overline w_{mi}$

$e^{\alpha} \sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i)) = [\sum_{i=1}^N \overline w_{mi} - \sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i))] e^{-\alpha}$

$log \sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i)) + log e^{\alpha} = log [\sum_{i=1}^N \overline w_{mi} - \sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i))] + log e^{-\alpha}$

$log e^{\alpha} = \frac {1} {2} log \frac {\sum_{i=1}^N \overline w_{mi} - \sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i))} {\sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i))}$

$\alpha_m = \frac {1} {2} log \frac {\sum_{i=1}^N \overline w_{mi} - \sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i))} {\sum_{i=1}^N \overline w_i I(y_i = \not G(x_i))}$

$e_m = \frac {\sum_{i=1}^N \overline w_{mi} I(y_i = \not G(x_i))} {\sum_{i=1}^N \overline w_{mi}}$

$\alpha_m = \frac {1} {2} log \frac {1 - e_m} {e_m}$

$f_m(x) = f_{m-1}(x) + \alpha_m G_m(x)$

\begin{aligned} \overline w_{m+1,i} & = exp(-y_i f_m (x_i)) \\ & = exp(-y_i (f_{m-1}(x_i)+\alpha_m G_m(x_i))) \\ & = \overline w_{m,i} exp(- \alpha_m y_i G_m(x_i)) \end{aligned}

《统计学习方法》 李航