import math
import numpy as np
import matplotlib.pyplot as plt
RATIO = 3 # 椭圆的长宽比
LIMIT = 1.2 # 图像的坐标轴范围
class PlotComparaison(object):
"""多种优化器来优化函数 x1^2 + x2^2 * RATIO^2.每次参数改变为(d1, d2).梯度为(dx1, dx2)t+1次迭代,标准GD,d1_{t+1} = - eta * dx1d2_{t+1} = - eta * dx2带Momentum,d1_{t+1} = eta * (mu * d1_t - dx1_{t+1})d2_{t+1} = eta * (mu * d2_t - dx2_{t+1})带Nesterov Momentum,d1_{t+1} = eta * (mu * d1_t - dx1^{nag}_{t+1})d2_{t+1} = eta * (mu * d2_t - dx2^{nag}_{t+1})其中(dx1^{nag}, dx2^{nag})为(x1 + eta * mu * d1_t, x2 + eta * mu * d2_t)处的梯度RMSProp,w1_{t+1} = beta2 * w1_t + (1 - beta2) * dx1_t^2w2_{t+1} = beta2 * w2_t + (1 - beta2) * dx2_t^2d1_{t+1} = - eta * dx1_t / (sqrt(w1_{t+1}) + epsilon)d2_{t+1} = - eta * dx2_t / (sqrt(w2_{t+1}) + epsilon)Adam,每次参数改变为(d1, d2)v1_{t+1} = beta1 * v1_t + (1 - beta1) * dx1_tv2_{t+1} = beta1 * v2_t + (1 - beta1) * dx2_tw1_{t+1} = beta2 * w1_t + (1 - beta2) * dx1_t^2w2_{t+1} = beta2 * w2_t + (1 - beta2) * dx2_t^2v1_corrected = v1_{t+1} / (1 - beta1^{t+1})v2_corrected = v2_{t+1} / (1 - beta1^{t+1})w1_corrected = w1_{t+1} / (1 - beta2^{t+1})w2_corrected = w2_{t+1} / (1 - beta2^{t+1})d1_{t+1} = - eta * v1_corrected / (sqrt(w1_corrected) + epsilon)d2_{t+1} = - eta * v2_corrected / (sqrt(w2_corrected) + epsilon)"""
def __init__(self, eta=0.1, mu=0.9, beta1=0.9, beta2=0.99, epsilon=1e-10, angles=None, contour_values=None,
stop_condition=1e-4):
# 全部算法的学习率
self.eta = eta
# 启发式学习的终止条件
self.stop_condition = stop_condition
# Nesterov Momentum超参数
self.mu = mu
# RMSProp超参数
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
# 用正态分布随机生成初始点
self.x1_init, self.x2_init = np.random.uniform(LIMIT / 2, LIMIT), np.random.uniform(LIMIT / 2, LIMIT) / RATIO
self.x1, self.x2 = self.x1_init, self.x2_init
# 等高线相关
if angles == None:
angles = np.arange(0, 2 * math.pi, 0.01)
self.angles = angles
if contour_values == None:
contour_values = [0.25 * i for i in range(1, 5)]
self.contour_values = contour_values
setattr(self, "contour_col