逻辑回归梯度下降方法Python实现-依据成绩进行类别预测

参考: https://www.cnblogs.com/hello-/articles/9737549.html

测试数据地址:https://download.csdn.net/download/yuanshuaipeng/12423234

案例说明:假设你是一个大学系的管理员,你想根据两次考试的结果来决定每个申请人的录取机会。你有以前的申请人的历史数据,你可以用它作为逻辑回归的训练集。对于每一个培训例子,你有两个考试的申请人的分数和录取决定。为了做到这一点,我们将建立一个分类模型,根据考试成绩估计入学概率。

导入三大模块:numpy pandas matplotlib

import  time
import numpy as np
import pandas as pd
import  matplotlib.pyplot as plt

读取原始数据:

# 读取数据
    data = pd.read_csv("./LogiReg_data.txt", header=None, 
                           names=["score1", "score2", "label"])
    print(data.head(3))
    print(data.shape)

可视化展示:

def show_data(data):
    set_0 = data[data["label"]==0]
    set_1 = data[data["label"]==1]
    plt.figure(figsize=(10,5))
    plt.scatter(set_0["score1"], set_0["score2"], c="r", marker="*")
    plt.scatter(set_1["score1"], set_1["score2"], c="b", marker="o")
    plt.legend()
    plt.xlabel("score1")
    plt.ylabel("score2")
    plt.show()

逻辑回归实现

目标:建立分类器(求解出三个参数 θ0θ1θ2),θ0偏置项θ1第一次测试的成绩θ2第二次测试的成绩

设定阈值,根据阈值判断录取结果

完成模块:

sigmoid : 映射到概率的函数

model : 返回预测结果值

cost : 根据参数计算损失

gradient : 计算每个参数的梯度方向

descent : 进行参数更新

1.sigmoid函数:

def sigmode(z):
    return 1 / (1+ np.exp(-z))

2、预测函数:

def model(X, theta):
    """
    X:样本矩阵 (包括第一列的1)
    theta : 行向量 [θ0, θ1, ....θn]
    返回预测的结果值
    :return:
    """
    return sigmode(np.dot(X, theta.T))

3、原始数据及参数初始化

# 数据增加1列,
data.insert(0,"Ones",value=1)
print(data.loc[:3,:])

# set X and y
orig_data =data.as_matrix()
# m 样本的数量, n 特征数量+1(label)
m,n = orig_data.shape
theta = np.zeros(shape=(1, n - 1))

4、损失函数

def cost(X, y, theta):
    """
    获取样本的损失值,为所有样本的损失均值
    """
    # 注意,multipy和 np.dot的区别
    left = np.multiply(-y, np.log(model(X,theta)))
    right = np.multiply(1-y, np.log(1 - model(X,theta)))
    return np.sum(left-right) / len(X)

5、计算梯度:

def gradient(X,y,theta):
    """
    计算梯度,返回一个和theta 一样的数组
    :return:
    """
    #二维数组,1*n
    grad = np.zeros(shape=theta.shape)
    # error n*1
    error = y- model(X,theta)
    for j in range(grad.shape[1]):
        # grad 1*n ,
        grad[0, j] = - np.dot(error.T, X[:,j]) / X.shape[0]
    return grad

6、实现梯度下降:

6.1 设置三种迭代终止策略

1、迭代次数  2、损失差值 3、梯度差值

# 停止准则
STOP_ITER = 0 #  迭代数量
STOP_COST = 1 #损失函数
STOP_GRAD = 2 # 梯度值

def stop_cretion(type, value, threshold):
    if type == STOP_ITER:   return value > threshold
    elif type == STOP_COST: return  abs(value[-1]-value[-2]) < threshold
    elif type == STOP_GRAD: return np.linalg.norm(value) < threshold

6.2 三种梯度下降策略: 1 整批量梯度下降、2、随机梯度下降 3 小批量梯度下降

根据batchsize 来实现三种梯度下降

# 重排数据
def shuffle_data(data):
    np.random.shuffle(data)
    X= data[:,:-1]
    y = data[:,-1:]
    return X,y

def descent(data, theta, batchsize, stoptype, threshold, alpha):

    i=0  #迭代次数
    b =0 #批量位置索引
    init_time = time.time()

    # 初始化梯度:
    grad = np.zeros(theta.shape)

    #首先对数据清洗
    X, y = shuffle_data(data)

    # 初始化cost
    cost_value = [cost(X,y,theta)]

    while True:
        # 先求梯度:
        grad = gradient(X[b:b+batchsize,:], y[b:b+batchsize,:], theta)
        b += batchsize

        #执行完一次后对数据重新洗牌
        if b>= X.shape[0]:
            b=0
            X,y = shuffle_data(data)

        # 根据梯度更新参数
        theta = theta - alpha * grad
        # 损失值
        ## 注意:尽管求梯度时不一定用到所有样本,但求损失值时,用的是所有样本
        cost_value.append(cost(X,y,theta))
        i+=1

        if stoptype == STOP_ITER: value=i
        elif stoptype == STOP_COST: value= cost_value
        elif stoptype == STOP_GRAD: value= grad

        #判断是否
        if stop_cretion(stoptype, value, threshold): break
        # 返回: theta ,迭代次数,损失值,当前梯度, 运行时间
    return theta, i-1, cost_value, grad, time.time() - init_time

7、主函数及结果展示:

def run(data, theta, batchSize, stopType, thresh, alpha):
    theta, iter, costs, grad, dur = descent(data, theta, batchSize, stopType, thresh, alpha)
    # 根据结果绘制图形
    name = "Original" if (data[:, 1] > 2).sum() > 1 else "Scaled"
    name += " data - learning rate: {} - ".format(alpha)
    if batchSize == data.shape[0]:
        strDescType = "Gradient"
    elif batchSize == 1:
        strDescType = "Stochastic"
    else:
        strDescType = "Mini-batch ({})".format(batchSize)
    name += strDescType + " descent - Stop: "
    if stopType == STOP_ITER:
        strStop = "{} iterations".format(thresh)
    elif stopType == STOP_COST:
        strStop = "costs change < {}".format(thresh)
    else:
        strStop = "gradient norm < {}".format(thresh)
    name += strStop
    print("***{}\nTheta: {} - Iter: {} - Last cost: {:03.2f} - Duration: {:03.2f}s".format(
        name, theta, iter, costs[-1], dur))
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.plot(np.arange(len(costs)), costs, 'r')
    ax.set_xlabel('Iterations')
    ax.set_ylabel('Cost')
    ax.set_title(name.upper() + ' - Error vs. Iteration')
    plt.show()
    return theta

1、 基于批量梯度下降,终止准则5000次迭代

# 按迭代次数为终止条件:
# run(orig_data, theta, m, STOP_ITER, 5000, 0.000001)
#按损失值的差值
# run(orig_data, theta, m, STOP_COST, thresh=0.000001, alpha=0.001)

# 根据梯度变化停止
# run(orig_data, theta, m, STOP_GRAD, thresh=0.05, alpha=0.001)
#标准化后的实验结果
orig_data[:,:(n-1)] = scale(orig_data[:,:(n-1)])
# run(orig_data, theta, m, STOP_ITER, 50000, 0.001)
theta = run(orig_data, theta, 16, STOP_ITER, 50000, 0.001)

全部代码:

#coding=utf-8
import sys
import math
import random
import  time
import numpy as np
import pandas as pd
import  matplotlib.pyplot as plt

from sklearn.preprocessing import scale

# 停止准则
STOP_ITER = 0 #  迭代数量
STOP_COST = 1 #损失函数
STOP_GRAD = 2 # 梯度值


def show_data(data):
    set_0 = data[data["label"]==0]
    set_1 = data[data["label"]==1]
    plt.figure(figsize=(10,5))
    plt.scatter(set_0["score1"], set_0["score2"], c="r", marker="*")
    plt.scatter(set_1["score1"], set_1["score2"], c="b", marker="o")

    plt.xlabel("score1")
    plt.ylabel("score2")
    plt.show()


def sigmode(z):
    return 1 / (1+ np.exp(-z))


def model(X, theta):
    """
    X:样本矩阵 (包括第一列的1)
    theta : 行向量 [θ0, θ1, ....θn]
    返回预测的结果值
    :return:
    """
    return sigmode(np.dot(X, theta.T))

def cost(X, y, theta):
    """
    获取样本的损失值,为所有样本的损失均值
    """
    # 注意,multipy和 np.dot的区别
    left = np.multiply(-y, np.log(model(X,theta)))
    right = np.multiply(1-y, np.log(1 - model(X,theta)))
    return np.sum(left-right) / len(X)

def gradient(X,y,theta):
    """
    计算梯度,返回一个和theta 一样的数组
    :return:
    """
    #二维数组,1*n
    grad = np.zeros(shape=theta.shape)
    # error n*1
    error = y- model(X,theta)
    for j in range(grad.shape[1]):
        # grad 1*n ,
        grad[0, j] = - np.dot(error.T, X[:,j]) / X.shape[0]
    return grad


def gradient2(X, y, theta):
    grad = np.zeros(theta.shape)
    error = (model(X, theta) - y).ravel()
    for j in range(len(theta.ravel())):  # for each parmeter
        term = np.multiply(error, X[:, j])
        grad[0, j] = np.sum(term) / len(X)

    return grad



def stop_cretion(type, value, threshold):
    if type == STOP_ITER:   return value > threshold
    elif type == STOP_COST: return  abs(value[-1]-value[-2]) < threshold
    #
    elif type == STOP_GRAD: return np.linalg.norm(value) < threshold

# 重排数据
def shuffle_data(data):
    np.random.shuffle(data)
    X= data[:,:-1]
    y = data[:,-1:]
    return X,y

def descent(data, theta, batchsize, stoptype, threshold, alpha):

    i=0  #迭代次数
    b =0 #批量位置索引
    init_time = time.time()

    # 初始化梯度:
    grad = np.zeros(theta.shape)

    #首先对数据清洗
    X, y = shuffle_data(data)

    # 初始化cost
    cost_value = [cost(X,y,theta)]

    while True:
        # 先求梯度:
        grad = gradient(X[b:b+batchsize,:], y[b:b+batchsize,:], theta)
        b += batchsize

        #执行完一次后对数据重新洗牌
        if b>= X.shape[0]:
            b=0
            X,y = shuffle_data(data)

        # 根据梯度更新参数
        theta = theta - alpha * grad
        # 损失值
        ## 注意:尽管求梯度时不一定用到所有样本,但求损失值时,用的是所有样本
        cost_value.append(cost(X,y,theta))
        i+=1

        if stoptype == STOP_ITER: value=i
        elif stoptype == STOP_COST: value= cost_value
        elif stoptype == STOP_GRAD: value= grad

        #判断是否
        if stop_cretion(stoptype, value, threshold): break
        # 返回: theta ,迭代次数,损失值,当前梯度, 运行时间
    return theta, i-1, cost_value, grad, time.time() - init_time

def run(data, theta, batchSize, stopType, thresh, alpha):
    theta, iter, costs, grad, dur = descent(data, theta, batchSize, stopType, thresh, alpha)
    # 根据结果绘制图形
    name = "Original" if (data[:, 1] > 2).sum() > 1 else "Scaled"
    name += " data - learning rate: {} - ".format(alpha)
    if batchSize == data.shape[0]:
        strDescType = "Gradient"
    elif batchSize == 1:
        strDescType = "Stochastic"
    else:
        strDescType = "Mini-batch ({})".format(batchSize)
    name += strDescType + " descent - Stop: "
    if stopType == STOP_ITER:
        strStop = "{} iterations".format(thresh)
    elif stopType == STOP_COST:
        strStop = "costs change < {}".format(thresh)
    else:
        strStop = "gradient norm < {}".format(thresh)
    name += strStop
    print("***{}\nTheta: {} - Iter: {} - Last cost: {:03.2f} - Duration: {:03.2f}s".format(
        name, theta, iter, costs[-1], dur))
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.plot(np.arange(len(costs)), costs, 'r')
    ax.set_xlabel('Iterations')
    ax.set_ylabel('Cost')
    ax.set_title(name.upper() + ' - Error vs. Iteration')
    plt.show()
    return theta

def predict(X, theta):
    return [1 if x> 0.5 else 0 for x in model(X,theta)]

def acuuracy(y,predict_value):
    _y =y.ravel()
    correct = [1 if _y[i] == predict_value[i] else 0
               for i in range(len(_y))]
    return np.sum(correct) / len(_y)



if __name__ == '__main__':
    # 读取数据
    data = pd.read_csv("./LogiReg_data.txt", header=None, names=["score1", "score2", "label"])
    print(data.head(3))
    print(data.shape)
    # show_data(data)

    # 数据增加1列,
    data.insert(0,"Ones",value=1)
    print(data.loc[:3,:])

    # set X and y
    orig_data =data.as_matrix()
    # m 样本的数量, n 特征数量+1(label)
    m,n = orig_data.shape
    theta = np.zeros(shape=(1, n - 1))
    #批量梯度下降
    # 按迭代次数为终止条件:
    # run(orig_data, theta, m, STOP_ITER, 5000, 0.000001)

    #按损失值的差值
    # run(orig_data, theta, m, STOP_COST, thresh=0.000001, alpha=0.001)

    # 根据梯度变化停止
    # run(orig_data, theta, m, STOP_GRAD, thresh=0.05, alpha=0.001)

    #标准化
    orig_data[:,:(n-1)] = scale(orig_data[:,:(n-1)])
    # run(orig_data, theta, m, STOP_ITER, 50000, 0.001)
    theta = run(orig_data, theta, 16, STOP_ITER, 50000, 0.001)

    #计算准确率
    print("准确率为:", acuuracy(orig_data[:,-1], predict(orig_data[:,:(n-1)], theta)))

 

 

 

 

 

  • 6
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值