小批量梯度下降算法步骤_三种梯度下降算法——代码实现

最新推荐文章于 2024-07-23 13:46:07 发布

当下的幸福

最新推荐文章于 2024-07-23 13:46:07 发布

阅读量1.6k

点赞数

文章标签：小批量梯度下降算法步骤

本文链接：https://blog.csdn.net/weixin_34318945/article/details/112449503

版权

首先进行相关模块的引用和一些数据的设置，生成图片的保存地址等

1.导入模块

import numpy as np
#导入操作系统
import os
#画图
%matplotlib inline
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings(action="ignore",message="^internal gelsd")
# 随即种子
np.random.seed (42)

2.图片保存地址

# 保存图片
PROJECT_ROOT_DIR="."
MODEL_ID ="linear_nodels"
def save_fig(fig_id,tight_layout=True):
    path=os.path.join(PROJECT_ROOT_DIR,"images",MODEL_ID,fig_id + ".png")
    print ("Saving figure",fig_id)
    plt.savefig(path,format = "png",dpi=300)

3.创建数据

X = 2*np.random.rand(100,1)
y = 4 + 3 * X + np.random.randn(100,1)

plt.plot(X,y,"b.")
plt.xlabel("$x_1$",fontsize=18)
plt.ylabel("$y$",rotation=0,fontsize=18)
save_fig("generated_data_plot")
plt.show()

4.预测

# 添加新特征
X_b = np.c_[np.ones((100,1)),X]

# 创建测试数据
X_new=np.array([[0],[2]])
X_new_b=np.c_[np.ones((2,1)),X_new]

# 从sklearn 包里导入线性回归方程
from sklearn.linear_model import LinearRegression 
lin_reg=LinearRegression() #创建线性回归对象
lin_reg.fit(X,y)  #拟合训练数据
lin_reg.intercept_,lin_reg.coef_  #输出截距,斜率
# 对测试集进行预测
lin_reg.predict(X_new)

然后分别使用三种梯度下降算法

1.批量梯度下降

eta = 0.1  #alpha
n_iterations = 1000  #迭代次数
m = 100   #数据大小
theta = np.random.randn(2,1)   #theta的数据
for iteration in range(n_iterations):  #对迭代次数进行循环
    gradients = 1/m * X_b.T.dot(X_b.dot(theta) - y)  
    theta = theta - eta * gradients  #更新theta值
theta_path_bgd= [] 
def plot_gradient_descent(theta,eta,theta_path = None):
    m = len(X_b)
    plt.plot(X, y, "b.")
    n_iterations = 1000
    for iteration in range(n_iterations):
        if iteration < 10:
            y_predict = X_new_b.dot(theta)
            style = "r-"
            plt.plot(X_new, y_predict, style)
        gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y) 
        theta = theta - eta * gradients
        if theta_path is not None:
            theta_path.append(theta)
        plt.xlabel("$x_1$",fontsize = 18)
        plt.axis([0,2,0,15])
        plt.title(r"$eta = {}$".format(eta), fontsize = 16)
np.random.seed(42)
theta = np.random.randn(2,1)
plt.figure(figsize=(10,4))
plt.subplot(131);plot_gradient_descent(theta, eta=0.02)
plt.ylabel("$y$", rotation=0, fontsize=18)
plt.subplot(132);plot_gradient_descent(theta,eta=0.1, theta_path=theta_path_bgd)
plt.subplot(133);plot_gradient_descent(theta,eta=0.5)
save_fig("gradient_descent_plot")
plt.show()

2.随机梯度下降求解线性回归（Stochastic Gradient Descent）

theta_path_sgd = []
m = len(X_b)
np.random.seed(42)
n_epochs = 50
theta = np.random.rand(2,1)
for epoch in range(n_epochs):
    for i in range(m):
        if epoch == 0 and i <20:
            y_predict = X_new_b.dot(theta)
            style = "r-"
            plt.plot(X_new, y_predict, style)
        random_index = np.random.randint(m)
        xi = X_b[random_index:random_index+1]
        yi = y[random_index:random_index+1]
        gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
        eta = 0.1
        theta = theta - eta * gradients
        theta_path_sgd.append(theta)
plt.plot(X, y, "b.")
plt.xlabel("$x_1$",fontsize = 18)
plt.ylabel("$y$", rotation = 0, fontsize = 18)
plt.axis([0,2,0,15])
save_fig("generated_SGD_plot")
plt.show()

3.小批量梯度下降求解线性回归（Mini-batch gradient descent）

theta_path_mgd = []
n_iterations = 50
minibatch_size = 20
np.random.seed(42)
theta = np.random.randn(2,1)
for epoch in range(n_iterations):
    shuffled_indices = np.random.permutation(m)
    X_b_shuffled = X_b[shuffled_indices]
    y_shuffled = y[shuffled_indices]
    for i in range(0,m,minibatch_size):
        xi = X_b_shuffled[i:i+minibatch_size]
        yi = y_shuffled[i:i+minibatch_size]
        gradients = 2/minibatch_size * xi.T.dot(xi.dot(theta) - yi)
        eta = 0.1
        theta = theta - eta * gradients
        theta_path_mgd.append(theta)
theta_path_bgd = np.array(theta_path_bgd)
theta_path_sgd = np.array(theta_path_sgd)
theta_path_mgd = np.array(theta_path_mgd)
plt.figure(figsize=(7,4))
plt.plot(theta_path_sgd[:,0], theta_path_sgd[:,1], "r-o", linewidth=1, label="Stochastic")
plt.plot(theta_path_mgd[:,0], theta_path_mgd[:,1], "g-s", linewidth=2, label="Mini-batch")
plt.plot(theta_path_bgd[:,0], theta_path_bgd[:,1], "b-+", linewidth=3, label="Batch")
plt.legend(loc="upper left", fontsize=16)
plt.xlabel(r"$theta_0$", fontsize=20)
plt.ylabel(r"$theta_1$", fontsize=20, rotation = 0)
plt.axis([2.5, 4.5, 2.3, 3.9])
plt.show()