- 学习衰减率:
随着学习的进行,使得学习率逐渐减小。AdaGrad会为参数的每个元素适当的体哦阿正学习率
# coding: utf-8
import numpy as np
class AdaGrad:
def __init__(self, learning_rate=0.01):
self.learning_rate = learning_rate
self.h = None
def update(self, params, grads):
if self.h is None:
self.h = {}
for key, value in params.items():
self.h[key] = np.zeros_like(value)
for key in params.keys():
self.h[key] += grads[key] * grads[key]
params[key] -= self.learning_rate * grads[key] / (np.sqrt(self.h[key]) + 1e-07) # 1e-07微小值避免分母为0