梯度下降法(Gradient Descent)是一个算法,但不是像多元线性回归那样是一个具体做
回归任务的算法,而是一个非常通用的优化算法来帮助一些机器学习算法求解出最优解的,
所谓的通用就是很多机器学习算法都是用它,甚至深度学习也是用它来求解最优解。所有优
化算法的目的都是期望以最快的速度把模型参数θ求解出来,梯度下降法就是一种经典常用
的优化算法。
一、一元函数梯度下降
函数:
import numpy as np
import matplotlib.pyplot as plt
# 原函数
def f(x):
return 3 * x ** 4 + 4 * x ** 3 + 2
# 导函数
def f_prime(x):
return 12 * x ** 3 + 12 * x ** 2
# 梯度下降
def gradient_descent(starting_point, learning_rate, num_iterations):
x = starting_point
x_history = [x]
for i in range(num_iterations):
x = x - learning_rate * f_prime(x)
x_history.append(x)
return x, x_history
starting_point = -2
learning_rate = 0.01
num_iterations = 100
x_min, x_history = gradient_descent(starting_point, learning_rate, num_iterations)
print(x_min)
x_values = np.linspace(-2, 2, 400)
y_values = f(x_values)
x_history = np.array(x_history)
y_history = f(x_history)
plt.rcParams['font.family'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False # 步骤二(解决坐标轴负数的负号显示问题)
plt.plot(x_values, y_values, label='f(x) = 3x^4 + 4x^3 + 2', color='blue')
plt.scatter(x_history, y_history, color='red', label='Gradient Descent', zorder=5)
plt.plot(x_history, y_history, color='red', linestyle='--', linewidth=1)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('梯度下降过程')
plt.legend()
plt.grid(True)
plt.show()
二、二元函数梯度下降
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# 定义多元函数 f(x, y) 及其梯度
def f(x, y):
return x**2 + y**2
def gradient(x, y):
df_dx = 2 * x
df_dy = 2 * y
return np.array([df_dx, df_dy])
# 梯度下降算法
def gradient_descent_multivariable(starting_point, learning_rate, num_iterations):
point = np.array(starting_point)
path = [point]
for i in range(num_iterations):
grad = gradient(point[0], point[1])
point = point - learning_rate * grad
path.append(point)
return point, np.array(path)
# 设置初始点、学习率和迭代次数
starting_point = [2, 2]
learning_rate = 0.1
num_iterations = 50
# 执行梯度下降算法
min_point, path = gradient_descent_multivariable(starting_point, learning_rate, num_iterations)
# 绘制三维梯度下降过程
x_values = np.linspace(-2, 2, 400)
y_values = np.linspace(-2, 2, 400)
X, Y = np.meshgrid(x_values, y_values)
Z = f(X, Y)
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, rstride=20, cstride=20, alpha=0.3, cmap='jet')
ax.contour(X, Y, Z, zdir='z', offset=-1, levels=np.logspace(-1, 3, 20), cmap='jet')
path_x = path[:, 0]
path_y = path[:, 1]
path_z = f(path[:, 0], path[:, 1])
ax.plot(path_x, path_y, path_z, 'r-', label='Gradient Descent Path', linewidth=2)
ax.scatter(path_x, path_y, path_z, color='red')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('f(X, Y)')
ax.set_title('3D Gradient Descent Optimization Path for f(x, y) = x^2 + y^2')
ax.legend()
plt.show()