逻辑回归(公式推导+numpy实现)

公式推导

含义公式维度
输入(矩阵形式) X = [ − x ( 1 ) T − − x ( 2 ) T − ⋯ − x ( i ) T − ⋯ − x ( m ) T − ] \mathbf X= \begin{bmatrix}-\mathbf {x^{(1)}}^T - \\-\mathbf {x^{(2)}}^T- \\\cdots\\-\mathbf {x^{(i)}}^T-\\\cdots\\-\mathbf {x^{(m)}}^T-\end{bmatrix} X= x(1)Tx(2)Tx(i)Tx(m)T m × n m\times n m×n
输入 x ( i ) = [ x 1 ( i ) x 2 ( i ) ⋯ x j ( i ) ⋯ x n ( i ) ] T \mathbf x^{(i)}=\begin{bmatrix} x_{1}^{(i)} & x_{2}^{(i)} & \cdots & x_{j}^{(i)} & \cdots & x_{n}^{(i)}\end{bmatrix}^T x(i)=[x1(i)x2(i)xj(i)xn(i)]T n × 1 n\times 1 n×1
标签 y = [ y ( 1 ) y ( 2 ) ⋯ y ( i ) ⋯ y ( m ) ] T \mathbf y={\begin{bmatrix} y^{(1)} & y^{(2)} & \cdots & y^{(i)} &\cdots &y^{(m)}\end{bmatrix}}^T y=[y(1)y(2)y(i)y(m)]T m × 1 m\times 1 m×1
参数 w = [ w 1 w 2 ⋯ w j ⋯ w n ] T \mathbf w={\begin{bmatrix}w_{1} & w_{2} & \cdots & w_{j} & \cdots & w_{n}\end{bmatrix}}^T w=[w1w2wjwn]T n × 1 n\times 1 n×1
输出 f w , b ( x ( i ) ) = g ( w T x ( i ) + b ) g ( z ) = 1 1 + e − z \begin{aligned}f_{\mathbf w,b}(\mathbf x^{(i)}) &=g({\mathbf w}^T{\mathbf x}^{(i)} + b) \\ g(z) &= \frac{1}{1+e^{-z}} \end{aligned} fw,b(x(i))g(z)=g(wTx(i)+b)=1+ez1标量
输出(矩阵形式) f w , b ( X ) = g ( X w + b ) f_{\mathbf w,b}(\mathbf X) = g(\mathbf X \mathbf w+ b) fw,b(X)=g(Xw+b) m × 1 m\times 1 m×1
预测 y ^ ( i ) = { 1 if  f w , b ( x ( i ) ) ≥ 0.5 0 if  f w , b ( x ( i ) ) < 0.5 \hat{y}^{(i)}= \begin{cases} 1 & \text{if }f_{\mathbf w,b}(\mathbf x^{(i)})\ge 0.5\\ 0 & \text{if }f_{\mathbf w,b}(\mathbf x^{(i)}) <0.5\end{cases} y^(i)={10if fw,b(x(i))0.5if fw,b(x(i))<0.5标量
损失函数 c o s t ( i ) = { − log ⁡ ( f w , b ( x ( i ) ) ) if  y ( i ) = 1 − log ⁡ ( 1 − f w , b ( x ( i ) ) ) if  y ( i ) = 0 = − y ( i ) log ⁡ ( f w , b ( x ( i ) ) ) − ( 1 − y ( i ) ) log ⁡ ( 1 − f w , b ( x ( i ) ) ) \begin{aligned}cost^{(i)} &= \begin{cases} -\log\left(f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) & \text{if }y^{(i)}=1\\ -\log \left( 1 - f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right)&\text{if }y^{(i)}=0\end{cases} \\ &=-y^{(i)} \log\left(f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) - \left( 1 - y^{(i)}\right) \log \left( 1 - f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right)\end{aligned} cost(i)={log(fw,b(x(i)))log(1fw,b(x(i)))if y(i)=1if y(i)=0=y(i)log(fw,b(x(i)))(1y(i))log(1fw,b(x(i)))标量
代价函数 J ( w , b ) = 1 m ∑ i = 1 m c o s t ( i ) + λ 2 m ∑ j = 1 n w j 2 = 1 m ( − y T l o g ( f w , b ( X ) ) − ( I − y ) T l o g ( I − f w , b ( X ) ) ) + λ 2 m w T w \begin{aligned}J(\mathbf w,b) &= \frac{1}{m} \sum\limits_{i = 1}^{m} cost^{(i)}+\frac{\lambda}{2m}\sum\limits_{j = 1}^{n} w_{j}^2\\&=\frac{1}{m}\left(-\mathbf y^Tlog(f_{\mathbf w,b}(\mathbf X))-(\Iota-\mathbf y)^Tlog(\Iota-f_{\mathbf w,b}(\mathbf X))\right)+\frac{\lambda}{2m}\mathbf w^T\mathbf w\end{aligned} J(w,b)=m1i=1mcost(i)+2mλj=1nwj2=m1(yTlog(fw,b(X))(Iy)Tlog(Ifw,b(X)))+2mλwTw标量
梯度下降 w j : = w j − α ∂ J ( w , b ) ∂ w j b : = b − α ∂ J ( w , b ) ∂ b ∂ J ( w , b ) ∂ w j = 1 m ∑ i = 1 m ( f w , b ( x ( i ) ) − y ( i ) ) x j ( i ) + λ m w j ∂ J ( w , b ) ∂ b = 1 m ∑ i = 1 m ( f w , b ( x ( i ) ) − y ( i ) ) \begin{aligned}w_j :&= w_j - \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j}\\ b :&= b - \alpha \frac{\partial J(\mathbf{w},b)}{\partial b}\\\frac{\partial J(\mathbf{w},b)}{\partial w_j} &= \frac{1}{m} \sum\limits_{i = 1}^{m} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} + \frac{\lambda}{m} w_j \\ \frac{\partial J(\mathbf{w},b)}{\partial b} &= \frac{1}{m} \sum\limits_{i = 1}^{m} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \end{aligned} wj:b:wjJ(w,b)bJ(w,b)=wjαwjJ(w,b)=bαbJ(w,b)=m1i=1m(fw,b(x(i))y(i))xj(i)+mλwj=m1i=1m(fw,b(x(i))y(i))标量
梯度下降(矩阵形式) w : = w − α ∂ J ( w , b ) ∂ w ∂ J ( w , b ) ∂ w = 1 m X T ( f w , b ( X ) − y ) + λ m w \begin{aligned}\mathbf w:&=\mathbf w-\alpha\frac{\partial J(\mathbf{w},b)}{\partial \mathbf{w}}\\ \frac{\partial J(\mathbf{w},b)}{\partial \mathbf{w}}&=\frac{1}{m}\mathbf X^T(f_{\mathbf w,b}(\mathbf X) -\mathbf y)+\frac{\lambda}{m} \mathbf w\end{aligned} w:wJ(w,b)=wαwJ(w,b)=m1XT(fw,b(X)y)+mλw n × 1 n\times 1 n×1

numpy实现

def zscore_normalize_features(X):
    mu=np.mean(X,axis=0)                 
    sigma=np.std(X,axis=0)           
    X_norm=(X-mu)/sigma      
    return X_norm,mu,sigma
# SIGMOID
def sig(z):
 
    return 1/(1+np.exp(-z))
# f_wb
def compute_f_wb(X,w,b):
    f_wb=sig(np.dot(X,w)+b)# (m,1)
    return f_wb
# j_wb
def compute_cost(X,y,w,b,lambda_,f_wb_function):
    m,n=X.shape
    f_wb=f_wb_function(X,w,b) # m*1
    j_wb=1/m*(-np.dot(y.T, np.log(f_wb))-np.dot((1-y).T,np.log(1-f_wb)))+(lambda_/2*m)*np.dot(w.T,w)# (1,1)
    j_wb=j_wb[0,0] # scalar  
    return j_wb
# dj_dw,dj_db
def compute_gradient(X, y, w, b, lambda_,f_wb_function): 
    m,n=X.shape
    f_wb=f_wb_function(X,w,b) # m*1
    
    dj_dw=(1/m)*np.dot(X.T,(f_wb-y))+(lambda_/n)*w # n*1
    dj_db=(1/m)*np.sum(f_wb-y) # scalar
    return dj_dw,dj_db
# w,b,j_history,w_history
def gradient_descent(X, y, w, b, cost_function, gradient_function, f_wb_function,alpha, num_iters,lambda_): 
    J_history = []
    w_history = []
    w_temp = copy.deepcopy(w)  
    b_temp = b
    for i in range(num_iters):
        dj_dw, dj_db = gradient_function(X, y, w_temp,b_temp,lambda_,f_wb_function)    
        w_temp = w_temp  - alpha * dj_dw               
        b_temp  = b_temp  - alpha * dj_db      
        cost =  cost_function(X, y, w_temp, b_temp,lambda_,f_wb_function)
        J_history.append(cost)  
    return w_temp, b_temp, J_history, w_history 

样本点

x_train, y_train = load_data("data/ex2data2.txt")
y_train=y_train.reshape(-1,1)
x_train.shape,y_train.shape
fig=go.Figure()
fig.add_trace(
    go.Scatter(
        x=x_train[np.where(y_train==0)[0],0],
        y=x_train[np.where(y_train==0)[0],1],
        mode="markers",
        name="第一类"
    )
)
fig.add_trace(
    go.Scatter(
        x=x_train[np.where(y_train==1)[0],0],
        y=x_train[np.where(y_train==1)[0],1],
        mode="markers",
        name="第一类"
    )
)
fig.update_layout(width=1000,height=618)
fig.show()

在这里插入图片描述

特征

def map_feature(X1, X2,degree):
    """
    Feature mapping function to polynomial features    
    """
    X1 = np.atleast_1d(X1)
    X2 = np.atleast_1d(X2)
    out = []
    for i in range(1, degree+1):
        for j in range(i + 1):
            out.append((X1**(i-j) * (X2**j)))
    return np.stack(out, axis=1)
    
feature_power=3
features=map_feature(x_train[:, 0], x_train[:, 1],feature_power)
features.shape
x_,mu,sigma=zscore_normalize_features(features)
y_=y_train
x_.shape,y_.shape

梯度下降

m,n=x_.shape
initial_w = np.zeros((n,1))
initial_b = 0
iterations = 1500
alpha = 0.3
lambda_=0
w,b,J_history,w_history = gradient_descent(x_ ,y_, initial_w, initial_b, compute_cost, compute_gradient, compute_f_wb,alpha, iterations,lambda_)

fig=go.Figure()
fig.update_layout(width=1000,height=618)
fig.add_trace(
    go.Scatter(
        x=np.arange(1,iterations+1),
        y=J_history,
        name="学习曲线",
        mode="markers+lines"
    )
)
fig.update_layout(
    xaxis_title="迭代次数",
    yaxis_title="J_wb"
)
fig.show()

在这里插入图片描述

决策边界

u=np.linspace(-1.5,1.5,100)
v=np.linspace(-1.5,1.5,100)
z = np.zeros((len(u), len(v)))

for i in range(len(u)):
    for j in range(len(v)):
        temp=(map_feature(u[i], v[j],feature_power).T-mu.reshape(-1,1))/sigma.reshape(-1,1)
        z[i,j] = sig(np.dot(w.T,temp) + b)[0,0]
        
fig=go.Figure()
fig.update_layout(width=1000,height=618)
fig.add_trace(
    go.Contour(
        z=z,
        contours_coloring='lines',
        x=u,
        y=v,
    )
)
fig.add_trace(
    go.Scatter(
        x=x_train[np.where(y_train==0)[0],0],
        y=x_train[np.where(y_train==0)[0],1],
        mode="markers",
        name="第一类"
    )
)
fig.add_trace(
    go.Scatter(
        x=x_train[np.where(y_train==1)[0],0],
        y=x_train[np.where(y_train==1)[0],1],
        mode="markers",
        name="第一类"
    )
)
fig.show()

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

西红柿爱喝水

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值