逻辑回归梯度下降方法Python实现-依据成绩进行类别预测

最新推荐文章于 2023-07-26 16:09:24 发布

yuanshuaipeng

最新推荐文章于 2023-07-26 16:09:24 发布

阅读量1.5k

点赞数 6

分类专栏：机器学习文章标签：机器学习逻辑回归 python

本文链接：https://blog.csdn.net/yuanshuaipeng/article/details/106141084

版权

机器学习专栏收录该内容

9 篇文章 1 订阅

订阅专栏

参考： https://www.cnblogs.com/hello-/articles/9737549.html

测试数据地址：https://download.csdn.net/download/yuanshuaipeng/12423234

案例说明：假设你是一个大学系的管理员，你想根据两次考试的结果来决定每个申请人的录取机会。你有以前的申请人的历史数据，你可以用它作为逻辑回归的训练集。对于每一个培训例子，你有两个考试的申请人的分数和录取决定。为了做到这一点，我们将建立一个分类模型，根据考试成绩估计入学概率。

导入三大模块：numpy pandas matplotlib

import  time
import numpy as np
import pandas as pd
import  matplotlib.pyplot as plt

读取原始数据：

# 读取数据
    data = pd.read_csv("./LogiReg_data.txt", header=None, 
                           names=["score1", "score2", "label"])
    print(data.head(3))
    print(data.shape)

可视化展示：

def show_data(data):
    set_0 = data[data["label"]==0]
    set_1 = data[data["label"]==1]
    plt.figure(figsize=(10,5))
    plt.scatter(set_0["score1"], set_0["score2"], c="r", marker="*")
    plt.scatter(set_1["score1"], set_1["score2"], c="b", marker="o")
    plt.legend()
    plt.xlabel("score1")
    plt.ylabel("score2")
    plt.show()

逻辑回归实现

目标：建立分类器（求解出三个参数 θ0θ1θ2）,θ0偏置项θ1第一次测试的成绩θ2第二次测试的成绩

设定阈值，根据阈值判断录取结果

完成模块：

sigmoid : 映射到概率的函数

model : 返回预测结果值

cost : 根据参数计算损失

gradient : 计算每个参数的梯度方向

descent : 进行参数更新

1.sigmoid函数：

def sigmode(z):
    return 1 / (1+ np.exp(-z))

2、预测函数：

def model(X, theta):
    """
    X：样本矩阵 （包括第一列的1）
    theta : 行向量 [θ0， θ1, ....θn]
    返回预测的结果值
    :return:
    """
    return sigmode(np.dot(X, theta.T))

3、原始数据及参数初始化

# 数据增加1列，
data.insert(0,"Ones",value=1)
print(data.loc[:3,:])

# set X and y
orig_data =data.as_matrix()
# m 样本的数量， n 特征数量+1（label）
m,n = orig_data.shape
theta = np.zeros(shape=(1, n - 1))

4、损失函数

def cost(X, y, theta):
    """
    获取样本的损失值，为所有样本的损失均值
    """
    # 注意，multipy和 np.dot的区别
    left = np.multiply(-y, np.log(model(X,theta)))
    right = np.multiply(1-y, np.log(1 - model(X,theta)))
    return np.sum(left-right) / len(X)

5、计算梯度：

def gradient(X,y,theta):
    """
    计算梯度，返回一个和theta 一样的数组
    :return:
    """
    #二维数组，1*n
    grad = np.zeros(shape=theta.shape)
    # error n*1
    error = y- model(X,theta)
    for j in range(grad.shape[1]):
        # grad 1*n ,
        grad[0, j] = - np.dot(error.T, X[:,j]) / X.shape[0]
    return grad

6、实现梯度下降：

6.1 设置三种迭代终止策略

1、迭代次数 2、损失差值 3、梯度差值

# 停止准则
STOP_ITER = 0 #  迭代数量
STOP_COST = 1 #损失函数
STOP_GRAD = 2 # 梯度值

def stop_cretion(type, value, threshold):
    if type == STOP_ITER:   return value > threshold
    elif type == STOP_COST: return  abs(value[-1]-value[-2]) < threshold
    elif type == STOP_GRAD: return np.linalg.norm(value) < threshold

6.2 三种梯度下降策略： 1 整批量梯度下降、2、随机梯度下降 3 小批量梯度下降

根据batchsize 来实现三种梯度下降

# 重排数据
def shuffle_data(data):
    np.random.shuffle(data)
    X= data[:,:-1]
    y = data[:,-1:]
    return X,y

def descent(data, theta, batchsize, stoptype, threshold, alpha):

    i=0  #迭代次数
    b =0 #批量位置索引
    init_time = time.time()

    # 初始化梯度：
    grad = np.zeros(theta.shape)

    #首先对数据清洗
    X, y = shuffle_data(data)

    # 初始化cost
    cost_value = [cost(X,y,theta)]

    while True:
        # 先求梯度：
        grad = gradient(X[b:b+batchsize,:], y[b:b+batchsize,:], theta)
        b += batchsize

        #执行完一次后对数据重新洗牌
        if b>= X.shape[0]:
            b=0
            X,y = shuffle_data(data)

        # 根据梯度更新参数
        theta = theta - alpha * grad
        # 损失值
        ## 注意：尽管求梯度时不一定用到所有样本，但求损失值时，用的是所有样本
        cost_value.append(cost(X,y,theta))
        i+=1

        if stoptype == STOP_ITER: value=i
        elif stoptype == STOP_COST: value= cost_value
        elif stoptype == STOP_GRAD: value= grad

        #判断是否
        if stop_cretion(stoptype, value, threshold): break
        # 返回： theta ,迭代次数，损失值，当前梯度， 运行时间
    return theta, i-1, cost_value, grad, time.time() - init_time

7、主函数及结果展示：

def run(data, theta, batchSize, stopType, thresh, alpha):
    theta, iter, costs, grad, dur = descent(data, theta, batchSize, stopType, thresh, alpha)
    # 根据结果绘制图形
    name = "Original" if (data[:, 1] > 2).sum() > 1 else "Scaled"
    name += " data - learning rate: {} - ".format(alpha)
    if batchSize == data.shape[0]:
        strDescType = "Gradient"
    elif batchSize == 1:
        strDescType = "Stochastic"
    else:
        strDescType = "Mini-batch ({})".format(batchSize)
    name += strDescType + " descent - Stop: "
    if stopType == STOP_ITER:
        strStop = "{} iterations".format(thresh)
    elif stopType == STOP_COST:
        strStop = "costs change < {}".format(thresh)
    else:
        strStop = "gradient norm < {}".format(thresh)
    name += strStop
    print("***{}\nTheta: {} - Iter: {} - Last cost: {:03.2f} - Duration: {:03.2f}s".format(
        name, theta, iter, costs[-1], dur))
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.plot(np.arange(len(costs)), costs, 'r')
    ax.set_xlabel('Iterations')
    ax.set_ylabel('Cost')
    ax.set_title(name.upper() + ' - Error vs. Iteration')
    plt.show()
    return theta

1、基于批量梯度下降，终止准则5000次迭代

# 按迭代次数为终止条件：
# run(orig_data, theta, m, STOP_ITER, 5000, 0.000001)

#按损失值的差值
# run(orig_data, theta, m, STOP_COST, thresh=0.000001, alpha=0.001)

# 根据梯度变化停止
# run(orig_data, theta, m, STOP_GRAD, thresh=0.05, alpha=0.001)

#标准化后的实验结果
orig_data[:,:(n-1)] = scale(orig_data[:,:(n-1)])
# run(orig_data, theta, m, STOP_ITER, 50000, 0.001)
theta = run(orig_data, theta, 16, STOP_ITER, 50000, 0.001)

全部代码：

#coding=utf-8
import sys
import math
import random
import  time
import numpy as np
import pandas as pd
import  matplotlib.pyplot as plt

from sklearn.preprocessing import scale

# 停止准则
STOP_ITER = 0 #  迭代数量
STOP_COST = 1 #损失函数
STOP_GRAD = 2 # 梯度值


def show_data(data):
    set_0 = data[data["label"]==0]
    set_1 = data[data["label"]==1]
    plt.figure(figsize=(10,5))
    plt.scatter(set_0["score1"], set_0["score2"], c="r", marker="*")
    plt.scatter(set_1["score1"], set_1["score2"], c="b", marker="o")

    plt.xlabel("score1")
    plt.ylabel("score2")
    plt.show()


def sigmode(z):
    return 1 / (1+ np.exp(-z))


def model(X, theta):
    """
    X：样本矩阵 （包括第一列的1）
    theta : 行向量 [θ0， θ1, ....θn]
    返回预测的结果值
    :return:
    """
    return sigmode(np.dot(X, theta.T))

def cost(X, y, theta):
    """
    获取样本的损失值，为所有样本的损失均值
    """
    # 注意，multipy和 np.dot的区别
    left = np.multiply(-y, np.log(model(X,theta)))
    right = np.multiply(1-y, np.log(1 - model(X,theta)))
    return np.sum(left-right) / len(X)

def gradient(X,y,theta):
    """
    计算梯度，返回一个和theta 一样的数组
    :return:
    """
    #二维数组，1*n
    grad = np.zeros(shape=theta.shape)
    # error n*1
    error = y- model(X,theta)
    for j in range(grad.shape[1]):
        # grad 1*n ,
        grad[0, j] = - np.dot(error.T, X[:,j]) / X.shape[0]
    return grad


def gradient2(X, y, theta):
    grad = np.zeros(theta.shape)
    error = (model(X, theta) - y).ravel()
    for j in range(len(theta.ravel())):  # for each parmeter
        term = np.multiply(error, X[:, j])
        grad[0, j] = np.sum(term) / len(X)

    return grad



def stop_cretion(type, value, threshold):
    if type == STOP_ITER:   return value > threshold
    elif type == STOP_COST: return  abs(value[-1]-value[-2]) < threshold
    #
    elif type == STOP_GRAD: return np.linalg.norm(value) < threshold

# 重排数据
def shuffle_data(data):
    np.random.shuffle(data)
    X= data[:,:-1]
    y = data[:,-1:]
    return X,y

def descent(data, theta, batchsize, stoptype, threshold, alpha):

    i=0  #迭代次数
    b =0 #批量位置索引
    init_time = time.time()

    # 初始化梯度：
    grad = np.zeros(theta.shape)

    #首先对数据清洗
    X, y = shuffle_data(data)

    # 初始化cost
    cost_value = [cost(X,y,theta)]

    while True:
        # 先求梯度：
        grad = gradient(X[b:b+batchsize,:], y[b:b+batchsize,:], theta)
        b += batchsize

        #执行完一次后对数据重新洗牌
        if b>= X.shape[0]:
            b=0
            X,y = shuffle_data(data)

        # 根据梯度更新参数
        theta = theta - alpha * grad
        # 损失值
        ## 注意：尽管求梯度时不一定用到所有样本，但求损失值时，用的是所有样本
        cost_value.append(cost(X,y,theta))
        i+=1

        if stoptype == STOP_ITER: value=i
        elif stoptype == STOP_COST: value= cost_value
        elif stoptype == STOP_GRAD: value= grad

        #判断是否
        if stop_cretion(stoptype, value, threshold): break
        # 返回： theta ,迭代次数，损失值，当前梯度， 运行时间
    return theta, i-1, cost_value, grad, time.time() - init_time

def run(data, theta, batchSize, stopType, thresh, alpha):
    theta, iter, costs, grad, dur = descent(data, theta, batchSize, stopType, thresh, alpha)
    # 根据结果绘制图形
    name = "Original" if (data[:, 1] > 2).sum() > 1 else "Scaled"
    name += " data - learning rate: {} - ".format(alpha)
    if batchSize == data.shape[0]:
        strDescType = "Gradient"
    elif batchSize == 1:
        strDescType = "Stochastic"
    else:
        strDescType = "Mini-batch ({})".format(batchSize)
    name += strDescType + " descent - Stop: "
    if stopType == STOP_ITER:
        strStop = "{} iterations".format(thresh)
    elif stopType == STOP_COST:
        strStop = "costs change < {}".format(thresh)
    else:
        strStop = "gradient norm < {}".format(thresh)
    name += strStop
    print("***{}\nTheta: {} - Iter: {} - Last cost: {:03.2f} - Duration: {:03.2f}s".format(
        name, theta, iter, costs[-1], dur))
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.plot(np.arange(len(costs)), costs, 'r')
    ax.set_xlabel('Iterations')
    ax.set_ylabel('Cost')
    ax.set_title(name.upper() + ' - Error vs. Iteration')
    plt.show()
    return theta

def predict(X, theta):
    return [1 if x> 0.5 else 0 for x in model(X,theta)]

def acuuracy(y,predict_value):
    _y =y.ravel()
    correct = [1 if _y[i] == predict_value[i] else 0
               for i in range(len(_y))]
    return np.sum(correct) / len(_y)



if __name__ == '__main__':
    # 读取数据
    data = pd.read_csv("./LogiReg_data.txt", header=None, names=["score1", "score2", "label"])
    print(data.head(3))
    print(data.shape)
    # show_data(data)

    # 数据增加1列，
    data.insert(0,"Ones",value=1)
    print(data.loc[:3,:])

    # set X and y
    orig_data =data.as_matrix()
    # m 样本的数量， n 特征数量+1（label）
    m,n = orig_data.shape
    theta = np.zeros(shape=(1, n - 1))
    #批量梯度下降
    # 按迭代次数为终止条件：
    # run(orig_data, theta, m, STOP_ITER, 5000, 0.000001)

    #按损失值的差值
    # run(orig_data, theta, m, STOP_COST, thresh=0.000001, alpha=0.001)

    # 根据梯度变化停止
    # run(orig_data, theta, m, STOP_GRAD, thresh=0.05, alpha=0.001)

    #标准化
    orig_data[:,:(n-1)] = scale(orig_data[:,:(n-1)])
    # run(orig_data, theta, m, STOP_ITER, 50000, 0.001)
    theta = run(orig_data, theta, 16, STOP_ITER, 50000, 0.001)

    #计算准确率
    print("准确率为：", acuuracy(orig_data[:,-1], predict(orig_data[:,:(n-1)], theta)))

yuanshuaipeng

关注

6
点赞
踩
20

收藏

觉得还不错? 一键收藏
0
评论
逻辑回归梯度下降方法Python实现-依据成绩进行类别预测

参考：https://www.cnblogs.com/hello-/articles/9737549.html测试数据地址：https://download.csdn.net/download/yuanshuaipeng/12423234案例说明：假设你是一个大学系的管理员，你想根据两次考试的结果来决定每个申请人的录取机会。你有以前的申请人的历史数据，你可以用它作为逻辑回归的训练集。对于每一个培训例子，你有两个考试的申请人的分数和录取决定。为了做到这一点，我们将建立一个分类模型，根据考试成绩估计入.
复制链接

扫一扫

专栏目录