批量梯度下降、随机梯度下降、小批量梯度下降、牛顿法实现逻辑回归（Python代码实现）

随心随性不随意

已于 2023-04-23 14:53:43 修改

阅读量261

点赞数

文章标签： python 机器学习 matplotlib numpy

于 2023-04-23 14:52:16 首次发布

本文链接：https://blog.csdn.net/pshhjj1314/article/details/130321369

版权

批量梯度下降、随机梯度下降、小批量梯度下降、牛顿法实现逻辑回归（Python代码实现）

from collections import OrderedDict

import numpy as np
import matplotlib.pyplot as plt

def read_train_data():
    x = []
    y = []
    with open("数据.txt", 'r') as f:
        for line in f.readlines():
            data_list = line.strip('\n').split(',')
            x.append([1.0, float(data_list[0]), float(data_list[1])])
            y.append(float(data_list[-1]))
    f.close()
    x = np.asarray(x, dtype=np.double)
    y = np.asarray(y, dtype=np.double)
    return x, y

x,y = read_train_data()
theta = np.zeros(x.shape[1])

#输出图片
def plot_train_data(x, y):
    plt.figure()
    for i in range(x.shape[0]):
        if y[i] == 1:
            plt.scatter(x[i][1], x[i][2], c='none', marker='o', edgecolors='g', label='y=1')
        else:
            plt.scatter(x[i][1], x[i][2], c='r', marker='x', label='y=0')
    plt.xlabel('x1')
    plt.ylabel('x2')
    # omit redundant legend
    handles, labels = plt.gca().get_legend_handles_labels()
    unique_label = OrderedDict(zip(labels, handles))
    plt.legend(unique_label.values(), unique_label.keys())
    plt.savefig("./plot_train_data.jpg")
    plt.show()

plot_train_data(x,y)

def sigmoid(z):
    return 1/(1 + np.exp(-z))

def log_likehood(theta, x, y):
    l_theta = 0.0
    for i in range(x.shape[0]):
        l_theta += y[i] * np.log(sigmoid(np.inner(theta, x[i]))) + (1 - y[i]) * np.log(1 - sigmoid(np.inner(theta, x[i])))
    return l_theta

def cost_function(theta, x, y):
    L = log_likehood(theta, x, y)
    J = -L/x.shape[0]
    return J

def gradient(theta, x, y):
    return -(y - sigmoid(np.inner(theta, x))) * x

def batch_gradient_descent(x, y, theta, alpha, num_iterations):
    cost_history = []
    for i in range(num_iterations):
        gradient_sum = np.zeros(x.shape[1])
        for j in range(x.shape[0]):
            gradient_sum += gradient(theta, x[j], y[j])
        theta -= alpha * gradient_sum / x.shape[0]
        cost = cost_function(theta, x, y)
        cost_history.append(cost)
    return theta, cost_history

# Train logistic regression model
alpha = 0.1
num_iterations = 50000
theta, cost_history = batch_gradient_descent(x, y, theta, alpha, num_iterations)

# Plot decision boundary and data points
plt.figure()
# 绘制数据点
for i in range(x.shape[0]):
    if y[i] == 1:
        plt.scatter(x[i][1], x[i][2], c='none', marker='o', edgecolors='g', label='y=1')
    else:
        plt.scatter(x[i][1], x[i][2], c='r', marker='x', label='y=0')
x_axis = np.linspace(np.min(x[:,1]), np.max(x[:,1]), 100)
y_axis = -(theta[0] + theta[1]*x_axis) / theta[2]
plt.plot(x_axis, y_axis, c='black')
plt.xlabel('x1')
plt.ylabel('x2')
plt.title(str(num_iterations) + ' iterstion(s)')
plt.savefig('./logistic_regression_decision_boundary.jpg')
plt.show()

随机梯度下降

#随机梯度下降
def stochastic_gradient_descent(theta, x, y, alpha, num_iterations):
    cost_history = []
    for i in range(num_iterations):
        index = random.randint(0, x.shape[0]-1)
        grad = gradient(theta, x[index], y[index])
        theta -= alpha * grad
        cost = cost_function(theta, x, y)
        cost_history.append(cost)
    return theta, cost_history

theta, cost_history = stochastic_gradient_descent(theta, x, y, alpha, num_iterations)

小批量梯度下降

# 随机选择一个样本的梯度
i = random.randint(0, 99)
batch_grad = gradient(theta, x[i], y[i])

#小批量梯度下降法
index = random.sample(range(0, 99), 10)
for i in index:
    batch_grad += gradient(theta, x[i], y[i])

def mini_batch_gradient_descent(x, y, alpha=0.1, num_iterations=200000, batch_size=10):
    theta = np.zeros(x.shape[1])
    m = x.shape[0]
    for i in range(num_iterations):
        # 随机选择一个批次的样本
        batch_indices = np.random.choice(m, batch_size, replace=False)
        batch_x = x[batch_indices]
        batch_y = y[batch_indices]
        batch_grad = np.zeros(x.shape[1])
        # 计算批次样本的梯度和
        for j in range(batch_size):
            batch_grad += gradient(theta, batch_x[j], batch_y[j])
        # 更新模型参数theta
        theta -= alpha * batch_grad/batch_size
    return theta

theta = mini_batch_gradient_descent(x, y, alpha=0.1, num_iterations=200000, batch_size=10)

牛顿法

#牛顿法实现逻辑回归
def grad_matrix(theta, x, y):
	y = np.squeeze(y)
	error = sigmoid(np.inner(theta, x)) - y
	G = np.inner(error, x.T)
    # 返回梯度向量
	return G.reshape(theta.shape[0], 1)

#海森矩阵
def hessian_matrix(theta, x):
    h = []
    # 计算海森矩阵的每一项
    for i in range(x.shape[0]):
        h += [sigmoid(np.inner(theta, x[i, :])) * (1 - sigmoid(np.inner(theta, x[i, :]))) * x[i, :]]
    H = np.dot(x.T, h)
    return H

def logistic_regression_newton(X, y, theta, max_iter=8, eps=1e-6):
    for i in range(max_iter):
        # 计算梯度向量和海森矩阵
        G = grad_matrix(theta, X, y)
        H = hessian_matrix(theta, X)
        delta = np.linalg.solve(H, G)
        theta -= delta.reshape(-1)        # 更新参数向量
        f_val = f(theta, X, y)
        # 如果参数变化的范数小于指定的容差，则停止迭代
        if np.abs(f_val - f(theta - delta.reshape(-1), X, y)) < eps:
            break
    return theta

theta = logistic_regression_newton(x, y, theta)