python神经网络自动优化_【python实现卷积神经网络】优化器的实现(SGD、Nesterov、Adagrad、Adadelta、RMSprop、Adam)...

importnumpy as npfrom mlfromscratch.utils importmake_diagonal, normalize#Optimizers for models that use gradient based methods for finding the#weights that minimizes the loss.#A great resource for understanding these methods:#http://sebastianruder.com/optimizing-gradient-descent/index.html

classStochasticGradientDescent():def __init__(self, learning_rate=0.01, momentum=0):

self.learning_rate=learning_rate

self.momentum=momentum

self.w_updt=Nonedefupdate(self, w, grad_wrt_w):#If not initialized

if self.w_updt isNone:

self.w_updt=np.zeros(np.shape(w))#Use momentum if set

self.w_updt = self.momentum * self.w_updt + (1 - self.momentum) *grad_wrt_w#Move against the gradient to minimize loss

return w - self.learning_rate *self.w_updtclassNesterovAcceleratedGradient():def __init__(self, learning_rate=0.001, momentum=0.4):

self.learning_rate=learning_rate

self.momentum=momentum

self.w_updt=np.array([])defupdate(self, w, grad_func):#Calculate the gradient of the loss a bit further down the slope from w

approx_future_grad = np.clip(grad_func(w - self.momentum * self.w_updt), -1, 1)#Initialize on first update

if notself.w_updt.any():

self.w_updt=np.zeros(np.shape(w))

self.w_updt= self.momentum * self.w_updt + self.learning_rate *approx_future_grad#Move against the gradient to minimize loss

return w -self.w_updtclassAdagrad():def __init__(self, learning_rate=0.01):

self.learning_rate=learning_rate

self.G= None #Sum of squares of the gradients

self.eps = 1e-8

defupdate(self, w, grad_wrt_w):#If not initialized

if self.G isNone:

self.G=np.zeros(np.shape(w))#Add the square of the gradient of the loss function at w

self.G += np.power(grad_wrt_w, 2)#Adaptive gradient with higher learning rate for sparse data

return w - self.learning_rate * grad_wrt_w / np.sqrt(self.G +self.eps)classAdadelta():def __init__(self, rho=0.95, eps=1e-6):

self.E_w_updt= None #Running average of squared parameter updates

self.E_grad = None #Running average of the squared gradient of w

self.w_updt = None #Parameter update

self.eps =eps

self.rho=rhodefupdate(self, w, grad_wrt_w):#If not initialized

if self.w_updt isNone:

self.w_updt=np.zeros(np.shape(w))

self.E_w_updt=np.zeros(np.shape(w))

self.E_grad=np.zeros(np.shape(grad_wrt_w))#Update average of gradients at w

self.E_grad = self.rho * self.E_grad + (1 - self.rho) * np.power(grad_wrt_w, 2)

RMS_delta_w= np.sqrt(self.E_w_updt +self.eps)

RMS_grad= np.sqrt(self.E_grad +self.eps)#Adaptive learning rate

adaptive_lr = RMS_delta_w /RMS_grad#Calculate the update

self.w_updt = adaptive_lr *grad_wrt_w#Update the running average of w updates

self.E_w_updt = self.rho * self.E_w_updt + (1 - self.rho) * np.power(self.w_updt, 2)return w -self.w_updtclassRMSprop():def __init__(self, learning_rate=0.01, rho=0.9):

self.learning_rate=learning_rate

self.Eg= None #Running average of the square gradients at w

self.eps = 1e-8self.rho=rhodefupdate(self, w, grad_wrt_w):#If not initialized

if self.Eg isNone:

self.Eg=np.zeros(np.shape(grad_wrt_w))

self.Eg= self.rho * self.Eg + (1 - self.rho) * np.power(grad_wrt_w, 2)#Divide the learning rate for a weight by a running average of the magnitudes of recent

#gradients for that weight

return w - self.learning_rate * grad_wrt_w / np.sqrt(self.Eg +self.eps)classAdam():def __init__(self, learning_rate=0.001, b1=0.9, b2=0.999):

self.learning_rate=learning_rate

self.eps= 1e-8self.m=None

self.v=None#Decay rates

self.b1 =b1

self.b2=b2defupdate(self, w, grad_wrt_w):#If not initialized

if self.m isNone:

self.m=np.zeros(np.shape(grad_wrt_w))

self.v=np.zeros(np.shape(grad_wrt_w))

self.m= self.b1 * self.m + (1 - self.b1) *grad_wrt_w

self.v= self.b2 * self.v + (1 - self.b2) * np.power(grad_wrt_w, 2)

m_hat= self.m / (1 -self.b1)

v_hat= self.v / (1 -self.b2)

self.w_updt= self.learning_rate * m_hat / (np.sqrt(v_hat) +self.eps)return w - self.w_updt

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值