逻辑回归

逻辑回归

目标函数

一个模型是线性还是非线性是这个模型的决策边界所决定的,因此逻辑是线性的分类器。
P ( y i = 1 ∣ x i ) = 1 1 + e − β T x i P ( y i = 0 ∣ x i ) = 1 − 1 1 + e − β T x i \begin{array}{c}P\left(y_{i}=1 | x_{i}\right)=\frac{1}{1+e^{-\beta^{T} x_{i}}} \\ P\left(y_{i}=0 | x_{i}\right)=1-\frac{1}{1+e^{-\beta^{T} x_{i}}}\end{array} P(yi=1xi)=1+eβTxi1P(yi=0xi)=11+eβTxi1
用交叉熵损失函数来表示
f ( β ) = − ∑ i = 1 n [ y i ln ⁡ p ( y i = 1 ∣ x i ) + ( 1 − y i ) ln ⁡ p ( y i = 0 ∣ x i ) ] = − ∑ i = 1 n [ y i ln ⁡ ( 1 1 + e − β T x i ) + ( 1 − y i ) ln ⁡ ( e − β T x i 1 + e − β T x i ) ] = ∑ i = 1 n [ ln ⁡ ( 1 + e β T x i ) − y i β T x i ] \begin{aligned} f(\beta) &=-\sum_{i=1}^{n}\left[y_{i} \ln p\left(y_{i}=1 | x_{i}\right)+\left(1-y_{i}\right) \ln p\left(y_{i}=0 | x_{i}\right)\right] \\ &=-\sum_{i=1}^{n}\left[y_{i} \ln \left(\frac{1}{1+e^{-\beta^{\mathrm{T}} x_{i}}}\right)+\left(1-y_{i}\right) \ln \left(\frac{e^{-\beta^{\mathrm{T}} x_{i}}}{1+e^{-\beta^{\mathrm{T}} x_{i}}}\right)\right] \\ &=\sum_{i=1}^{n}\left[\ln \left(1+e^{\beta^{T} x_{i}}\right)-y_{i} \beta^{\mathrm{T}} x_{i}\right] \end{aligned} f(β)=i=1n[yilnp(yi=1xi)+(1yi)lnp(yi=0xi)]=i=1n[yiln(1+eβTxi1)+(1yi)ln(1+eβTxieβTxi)]=i=1n[ln(1+eβTxi)yiβTxi]

梯度下降法

∂ f ( β ) ∂ β = − ∑ i = 1 n x i ( y i − P ( y i = 1 ∣ x i ) ) ∂ 2 f ( β ) ∂ β ∂ β T = ∑ i = 1 n x i x i T P ( y i = 1 ∣ x i ) P ( y i = 0 ∣ x i ) \begin{array}{l}\frac{\partial f(\beta)}{\partial \beta}=-\sum_{i=1}^{n} x_{i}\left(y_{i}-P\left(y_{i}=1 | x_{i}\right)\right) \\ \frac{\partial^{2} f(\beta)}{\partial \beta \partial \beta^{T}}=\sum_{i=1}^{n} x_{i} x_{i}^{T} P\left(y_{i}=1 | x_{i}\right) P\left(y_{i}=0 | x_{i}\right)\end{array} βf(β)=i=1nxi(yiP(yi=1xi))ββT2f(β)=i=1nxixiTP(yi=1xi)P(yi=0xi)

更新公式:
β t + 1 = β t − α ∂ f ( β ) ∂ β \beta^{t+1}=\beta^{t}-\alpha\displaystyle\frac{\partial f(\beta)}{\partial \beta} βt+1=βtαβf(β)

梯度下降法实现

批量梯度下降法(Batch Gradient Descent)

    def BatchGradDesc(self):
        """批量梯度下降法(Batch Gradient Descent)"""
        m,n = self.X.shape
        b = np.ones((m,1))
        X = np.concatenate([b,self.X],axis=1) #在X前加入一列1,当作b
        X = np.mat(X)
        y = np.mat(self.y).transpose()
        weights = np.zeros((n+1,1)) #初始化权重
        for i in range(self.max_iter): #梯度下降迭代
            p = self.sigmoid(X * weights)                           
            error = p - y
            weights = weights - self.lr * X.transpose() * error
        return weights.getA() 

随机梯度下降法(Stochastic Gradient Descent)

    def StocGradDesc(self):
        """随机梯度下降法(Stochastic Gradient Descent)"""
        b = np.ones((self.X.shape[0],1))
        data = np.concatenate([b,self.X,self.y[:,np.newaxis]],axis=1) #在X前加入一列1,当作b
        data = pd.DataFrame(data).sample(self.max_iter, replace=True)
        data.index = range(data.shape[0])
        X = np.mat(data.iloc[:,:-1].values)
        y = np.mat(data.iloc[:,-1].values).transpose()
        m,n = X.shape
        weights = np.zeros((n,1)) #初始化权重
        for i in range(m):
            p = self.sigmoid(X[i,:] * weights) 
            error = p - y[i]
            weights = weights - self.lr * X[i,:].transpose() * error
        return weights.getA()  

小批量梯度下降法(Mini-Batch Gradient Descent)

    def MiniBatchBGradDesc(self):
        """小批量梯度下降法(Mini-Batch Gradient Descent)"""
        batch = 20
        b = np.ones((self.X.shape[0],1))
        data = np.concatenate([b,self.X,self.y[:,np.newaxis]],axis=1) #在X前加入一列1,当作b
        data = pd.DataFrame(data).sample(batch*self.max_iter, replace=True)
        data.index = range(data.shape[0])
        X = np.mat(data.iloc[:,:-1].values)
        y = np.mat(data.iloc[:,-1].values).transpose()
        m,n = X.shape
        weights = np.zeros((n,1)) #初始化权重
        index1 = 0
        index2 = batch
        for i in range(m):
            p = self.sigmoid(X[index1:index2,:] * weights) 
            error = p - y[index1:index2]
            weights = weights - self.lr * X[index1:index2,:].transpose() * error
            index1 += batch
            index2 += batch
        return weights.getA() 

完整代码

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler

#二维数据测试
#X, y = make_blobs(n_samples=20, centers=2, n_features=2, cluster_std=1, random_state=0)

breast = load_breast_cancer()
X = breast.data
y = breast.target
scaler = StandardScaler()
X = scaler.fit_transform(X)


class Logistic(object):
    def __init__(self, X, y, lr=0.01, max_iter=1000, solver='BatchGradDesc'):
        self.X = X
        self.y = y
        self.lr = lr
        self.max_iter = max_iter
        self.solver = solver
    
    def sigmoid(self, x):
        """sigmoid函数"""
        return 1.0 / (1 + np.exp(-x))

    def pre(self, weights):
        """预测函数"""
        m,n = self.X.shape
        b = np.ones((m,1))
        X = np.concatenate([b,self.X],axis=1)
        prob = self.sigmoid(np.dot(X,weights))
        pre = np.where(prob>0.5, 1, 0)
        return pre

    def Accuracy(self, pre):
        """计算正确率"""
        m = self.y.shape[0]
        y = self.y.reshape(m,1)
        n_accuracy = (pre == y).sum()
        accuracy = round(n_accuracy *100 / m ,2)
        return accuracy

    def BatchGradDesc(self):
        """批量梯度下降法(Batch Gradient Descent)"""
        m,n = self.X.shape
        b = np.ones((m,1))
        X = np.concatenate([b,self.X],axis=1) #在X前加入一列1,当作b
        X = np.mat(X)
        y = np.mat(self.y).transpose()
        weights = np.zeros((n+1,1)) #初始化权重
        for i in range(self.max_iter): #梯度下降迭代
            p = self.sigmoid(X * weights)                           
            error = p - y
            weights = weights - self.lr * X.transpose() * error
        return weights.getA()                                         
    
    def StocGradDesc(self):
        """随机梯度下降法(Stochastic Gradient Descent)"""
        b = np.ones((self.X.shape[0],1))
        data = np.concatenate([b,self.X,self.y[:,np.newaxis]],axis=1) #在X前加入一列1,当作b
        data = pd.DataFrame(data).sample(self.max_iter, replace=True)
        data.index = range(data.shape[0])
        X = np.mat(data.iloc[:,:-1].values)
        y = np.mat(data.iloc[:,-1].values).transpose()
        m,n = X.shape
        weights = np.zeros((n,1)) #初始化权重
        for i in range(m):
            p = self.sigmoid(X[i,:] * weights) 
            error = p - y[i]
            weights = weights - self.lr * X[i,:].transpose() * error
        return weights.getA()  

    def MiniBatchBGradDesc(self):
        """小批量梯度下降法(Mini-Batch Gradient Descent)"""
        batch = 20
        b = np.ones((self.X.shape[0],1))
        data = np.concatenate([b,self.X,self.y[:,np.newaxis]],axis=1) #在X前加入一列1,当作b
        data = pd.DataFrame(data).sample(batch*self.max_iter, replace=True)
        data.index = range(data.shape[0])
        X = np.mat(data.iloc[:,:-1].values)
        y = np.mat(data.iloc[:,-1].values).transpose()
        m,n = X.shape
        weights = np.zeros((n,1)) #初始化权重
        index1 = 0
        index2 = batch
        for i in range(m):
            p = self.sigmoid(X[index1:index2,:] * weights) 
            error = p - y[index1:index2]
            weights = weights - self.lr * X[index1:index2,:].transpose() * error
            index1 += batch
            index2 += batch
        return weights.getA()  

#    def plot(self, weights):
#        """二维数据画图"""
#        plt.scatter(self.X[self.y==1,0],self.X[self.y==1,1])
#        plt.scatter(self.X[self.y==0,0],self.X[self.y==0,1])
#        plt.plot(self.X[:,0], (-self.X[:,0]*weights[1,0]-weights[0,0])/weights[2,0])

    def run(self):
        if self.solver == 'BatchGradDesc':
            weights = self.BatchGradDesc()
        elif self.solver == 'StocGradDesc':
            weights = self.StocGradDesc()
        else:
            weights = self.MiniBatchBGradDesc()
        #self.plot(weights)
        pre = self.pre(weights)
        accuracy = self.Accuracy(pre)
        print("{}正确率为:".format(self.solver),accuracy)
        return weights,pre
    
weights1,pre1=Logistic(X, y, solver='BatchGradDesc').run()
weights2,pre2=Logistic(X, y, solver='StocGradDesc').run()
weights3,pre3=Logistic(X, y, solver='MiniBatchBGradDesc').run()

输出

BatchGradDesc正确率为: 98.95
StocGradDesc正确率为: 98.07
MiniBatchBGradDesc正确率为: 98.59

梯度下降法优缺点

   1)批量梯度下降法BGD(Batch Gradient Descent):

         针对的是整个数据集,通过对所有的样本的计算来求解梯度的方向。

         优点:全局最优解;易于并行实现;

         缺点:当样本数据很多时,计算量开销大,计算速度慢

    2)小批量梯度下降法MBGD(mini-batch Gradient Descent)

        把数据分为若干个批,按批来更新参数,这样,一个批中的一组数据共同决定了本次梯度的方向,下降起来就不容易跑偏,减少了随机性

        优点:减少了计算的开销量,降低了随机性

     3)随机梯度下降法SGD(stochastic gradient descent)

        每个数据都计算算一下损失函数,然后求梯度更新参数。

        优点:计算速度快

        缺点:收敛性能不好

    总结:SGD可以看作是MBGD的一个特例,及batch_size=1的情况。
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值