通过定义求导
描述
通过定义求取损失函数的导数, 速度慢, 一般用于校验导数公式的结果是否正确.
不受损失函数的形式限制.
实现
import numpy as np
np.random.seed(555)
X = np.random.random(size=(1000, 10))
X_b = np.hstack([np.ones([len(X), 1]), X])
true_theta = np.arange(1, 12, dtype=float)
y = X_b.dot(true_theta)
def J(theta, X_b, y):
"""损失函数"""
try:
return np.sum((y - X_b.dot(theta)) ** 2) / len(X_b)
except:
return float('inf')
def dJ_debug(theta, X_b, y, epsilon=0.01):
"""
损失函数的导数
:param theta: 系数
:param X_b: 特征
:param y: 目标
:return:
"""
res = np.empty(len(theta))
for i in range(len(theta)):
theta_1 = theta.copy()
theta_1[i] += epsilon
theta_2 = theta.copy()
theta_2[i] -= epsilon
res[i] = (J(theta_1, X_b, y) - J(theta_2, X_b, y))/2/epsilon
return res
def gradient_descent(X, y, initial_theta, eta=0.001, n_iters=10000, epsilon=1e-8):
"""
多特征梯度下降算法的损失函数
:param X: 特征矩阵
:param y: 目标值y
:param initial_theta: 初始化的系数值,可以设为np.zeros(size=x_b.shape[1])
:param eta: 学习率
:param n_iters: 最大迭代学习次数
:param epsilon: 计算机精度
:return: 系数结果theta数组, 包括截距intercept_ = theta[0], 系数coef_ = theta[1]
"""
X_b = np.hstack([np.ones([len(X), 1]), X])
theta = initial_theta
i = 0
while i < n_iters:
i += 1
gradient = dJ_debug(theta, X_b, y)
last_theta = theta
theta = theta - eta * gradient
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
break
print(i)
return theta
if __name__ == '__main__':
initial_theta = np.zeros(X.shape[1] + 1)
theta = gradient_descent(X, y, initial_theta, 0.02)
print(theta)