# SVM的原理及推导

$w^Tx = b-\sigma \\ w^Tx = b+\sigma$

$\gamma = \frac{2}{\Vert w\Vert}$

\begin{aligned} \mathop{maximize}&~~\frac{2}{\|w\|}\\ s.t. &~~y_i(w^Tx+b) -1 \geq 0 , ~i=1,...,m \end{aligned}

\begin{aligned} minimize&~~\frac{1}{2}\|w\|^2\\ s.t. &~~y_i(w^Tx+b) -1 \geq 0 , ~i=1,...,m \end{aligned}

$\mathcal{L}(w,b,\lambda_i)=\inf_{w,b}\frac{1}{2}\|w\|^2-\sum_{i}^{m}\lambda_i[y_i (w^Tx+b)-1]$

\begin{aligned} \frac{\partial \mathcal{L}}{\partial w}&=w-\sum_i^m \lambda_iy_ix_i&=0 \\ \frac{\partial \mathcal{L}}{\partial b}&=\sum_i^m{\lambda_iy_i}=0 \end{aligned}

\begin{aligned} \mathcal{L}&=\frac{1}{2}w^Tw+\sum_i^m\lambda_i-\sum_i^m\lambda_iy_iw^Tx_i-\sum_i^m\lambda_iy_ib \\ &=\frac{1}{2}\|\sum_i^m\lambda_iy_ix_i\|^2+\sum_i^m\lambda_i-\sum_i^m\lambda_iy_i(\sum_i^m\lambda_iy_ix_i)^Tx_i\\ &=\sum_i^m\lambda_i-\frac{1}{2}(\sum_i^m\lambda_iy_ix_i^T)(\sum_i^m\lambda_iy_ix_i)\\ &=\sum_i^m\lambda_i-\frac{1}{2}\sum_i^m\sum_j^m\lambda_i\lambda_jy_iy_jx_i^Tx_j \end{aligned}

\begin{aligned} \mathop{maximizi}&~~\sum_i^m\lambda_i-\frac{1}{2}\sum_i^m\sum_j^m\lambda_i\lambda_jy_iy_jx_i^Tx_j \\ s.t.&~~\lambda_i \geq 0, ~i=1,...,m \\ &~~\sum_i^m\lambda_iy_i=0 \end{aligned}

## 带有松弛变量的SVM

\begin{aligned} \mathop{minimize}&~~\frac{1}{2}\|w\|^2+C\sum_i^m \xi_i\\ s.t.&~~y_i(w^Tx_i+b) \geq 1-\xi_i,~i=1,...,m\\ &\xi_i > 0, ~i=1, ..., m \end{aligned}

$\mathcal{L}(w,b,\xi,\lambda,\mu) = \inf_{w,b,\xi_i}\frac{1}{2}\|w\|^2+C\sum_i^m\xi_i+\sum_i^m\lambda_i[1-\xi_i-y_i(w^Tx_i+b)]-\sum_i^m\mu_i\xi_i$
$\mathcal{L}$对于$w,b,\xi$同样是convex的函数,同样我们对这三个变量进行求导。
\begin{aligned} &\frac{\partial \mathcal{L}}{\partial w} = w-\sum_i^m\lambda_iy_ix_i=0 \\ &\frac{\partial \mathcal{L}}{\partial b} = \sum_i^m\lambda_iy_i = 0 \\ &\frac{\partial \mathcal{L}}{\partial \xi_i}=C-\mu_i-\lambda_i=0 \end{aligned}

\begin{aligned} \mathcal{L} &= \frac{1}{2}\|\sum_i^m\lambda_iy_ix_i\|^2+C\sum_i^m\xi_i+\sum_i^m\lambda_i-\sum_i^m\lambda_i\xi_i-\sum_i^m\lambda_iy_ix_i^T\sum_i^m\lambda_iy_ix_i-\sum_i^m\lambda_iy_ib-\sum_i^m\mu_i\xi_i\\ &=C\sum_i^m\xi_i+\sum_i^m\lambda_i-\frac{1}{2}\sum_i^m\sum_j^m\lambda_i\lambda_jy_iy_jx_i^Tx_j-\sum_i^m\lambda_i\xi_i-\sum_i^m\mu_i\xi_i\\ &=\sum_i^m\lambda_i-\frac{1}{2}\sum_i^m\sum_j^m\lambda_i\lambda_jy_iy_jx_i^Tx_j \end{aligned}

\begin{aligned} \mathop{maximizi}&~~\sum_i^m\lambda_i-\frac{1}{2}\sum_i^m\sum_j^m\lambda_i\lambda_jy_iy_jx_i^Tx_j \\ s.t.&~~\lambda_i \geq 0, ~i=1,...,m \\ &~~\sum_i^m\lambda_iy_i=0 \\ &~~\lambda_i \leq C \end{aligned}

## SVR的推导

\begin{aligned} \mathop{minimize}&~~\frac{1}{2}\|w\|^2+C\sum_i^m(\xi_i+\xi_i^*),~i=1,...,m\\ s.t.&~~y_i-w^Tx_i-b\leq\varepsilon+\xi_i,~i=1,...,m \\ &~~w^Tx_i+b-y_i \leq \varepsilon+\xi_i^*,~i=1,...,m\\ &~~\xi_i\geq 0,~i=1,...,m\\ &~~\xi_i^* \geq 0,~i=1,...,m \end{aligned}

\begin{aligned} \mathcal{L}(w,b,\xi,\xi^*) = \inf_{w,b,\xi,\xi^*}&\frac{1}{2}\|w\|^2+C\sum_i^m(\xi_i+\xi_i^*)\\ &+\sum_i^m\lambda_i(y_i-w^Tx_i-b-\varepsilon-\xi_i)\\& +\sum_i^m\lambda_i^*(w^Tx_i+b-y_i-\varepsilon-\xi_i^*) \\ &-\sum_i^m\mu_i\xi_i-\sum_i^m\mu_i^*\xi_i^* \end{aligned}

\begin{aligned} &\frac{\partial \mathcal{L}}{\partial w} = w-\sum_i^m(\lambda_i-\lambda_i^*)x_i=0 \\ &\frac{\partial \mathcal{L}}{\partial b} = -\sum_i^m\lambda_i+\sum_i^m\lambda_i^*=0 \\ &\frac{\partial \mathcal{L}}{\partial \xi_i^{(*)}} = C-\lambda_i^{(*)}-\mu_i^{(*)}=0\\ \end{aligned}

\begin{aligned} \mathop{maximize}&~~-\frac{1}{2}\sum_i^m\sum_j^m(\lambda_i-\lambda_i^*)(\lambda_j-\lambda_j^*)x_i^Tx_j+\sum_i^m(\lambda_i-\lambda_i^*)y_i-\varepsilon\sum_i^m(\lambda_i+\lambda_i^*) \\ s.t.&~~\sum_i^m(\lambda_i^*-\lambda_i)=0\\ &~~\lambda_i^{(*)} \leq C,~i=1,...,m \end{aligned}