# 需要导入模块: from keras import backend [as 别名]
# 或者: from keras.backend import batch_get_value [as 别名]
def get_updates(self, params, constraints, loss):
grads = self.get_gradients(loss, params)
self.updates = [K.update_add(self.iterations, 1)]
t = (self.iterations + 1.)/self.accum_iters
accum_switch = K.cast(K.equal((self.iterations + 1.) % self.accum_iters, 0), dtype=K.floatx())
# Due to the recommendations in [2], i.e. warming momentum schedule
momentum_cache_t = self.beta_1 * (1. - 0.5 * (K.pow(0.96, t * self.schedule_decay)))
momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * (K.pow(0.96, (t + 1) * self.schedule_decay)))
m_schedule_new = self.m_schedule * momentum_cache_t
m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
self.updates.append((self.m_schedule, accum_switch*m_schedule_new + (1. - accum_switch)*self.m_schedule))
shapes = [x.shape for x in K.batch_get_value(params)]
ms = [K.zeros(shape) for shape in shapes]
vs = [K.zeros(shape) for shape in shapes]
gs = [K.zeros(shape) for shape in shapes]
self.weights = [self.iterations] + ms + vs
for p, gp, m, v, ga in zip(params, grads, ms, vs, gs):
g = (ga + gp)/self.accum_iters
# the following equations given in [1]
g_prime = g / (1. - m_schedule_new)
m_t = self.beta_1 * m + (1. - self.beta_1) * g
m_t_prime = m_t / (1. - m_schedule_next)
v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime
self.updates.append(K.update(m, (1. - accum_switch)*m + accum_switch*m_t))
self.updates.append(K.update(v, (1. - accum_switch)*v + accum_switch*v_t))
self.updates.append(K.update(ga, (1. - accum_switch)*(ga + gp)))
p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
new_p = p_t
# apply constraints
if p in constraints:
c = constraints[p]
new_p = c(new_p)
self.updates.append(K.update(p, (1-accum_switch)*p + accum_switch*new_p))
return self.updates