在Pytorch中实现SMU激活函数
本文代码来源于githubuSMU源码链接
# coding=utf-8
import torch
from torch import nn
class SMU(nn.Module):
'''
Implementation of SMU activation.
Shape:
- Input: (N, *) where * means, any number of additional
dimensions
- Output: (N, *), same shape as the input
Parameters:
- alpha: hyper parameter
References:
- See related paper:
https://arxiv.org/abs/2111.04682
Examples:
>>> smu = SMU()
>>> x = torch.Tensor([0.6,-0.3])
>>> x = smu(x)
'''
def __init__(self, alpha = 0.25):
'''
Initialization.
INPUT:
- alpha: hyper parameter
aplha is initialized with zero value by default
'''
super(SMU,self).__init__()
self.alpha = alpha
# initialize mu
self.mu = torch.nn.Parameter(torch.tensor(1000000.0))
def forward(self, x):
return ((1+self.alpha)*x + (1-self.alpha)*x*torch.erf(self.mu*(1-self.alpha)*x))/2
class SMU1(nn.Module):
'''
Implementation of SMU-1 activation.
Shape:
- Input: (N, *) where * means, any number of additional
dimensions
- Output: (N, *), same shape as the input
Parameters:
- alpha: hyper parameter
References:
- See related paper:
https://arxiv.org/abs/2111.04682
Examples:
>>> smu1 = SMU1()
>>> x = torch.Tensor([0.6,-0.3])
>>> x = smu1(x)
'''
def __init__(self, alpha = 0.25):
'''
Initialization.
INPUT:
- alpha: hyper parameter
aplha is initialized with zero value by default
'''
super(SMU1,self).__init__()
self.alpha = alpha
# initialize mu
self.mu = torch.nn.Parameter(torch.tensor(4.352665993287951e-9))
def forward(self, x):
return ((1+self.alpha)*x+torch.sqrt(torch.square(x-self.alpha*x)+torch.square(self.mu)))/2
def test_SMU(x):
smu_activation = SMU()
print(smu_activation(x))
def test_SMU1(x):
smu1_activation=SMU1()
print(smu1_activation(x))
def test():
x = torch.Tensor([0.6,-0.3])
test_SMU(x)
test_SMU1(x)
if __name__ == '__main__':
test()
在Tensorflow中实现SMU激活函数
# coding=utf-8
import tensorflow as tf
def SMU(x,alpha=0.25):
mu = tf.compat.v1.get_variable('SMU_mu', shape=(),
initializer=tf.constant_initializer(1000000),
dtype=tf.float32)
return ((1+alpha)*x + (1-alpha)*x*tf.math.erf(mu*(1-alpha)*x))/2
def SMU1(x,alpha=0.25):
mu = tf.compat.v1.get_variable('SMU1_mu', shape=(),
initializer=tf.constant_initializer(4.352665993287951e-9),
dtype=tf.float32)
return ((1+alpha)*x+tf.math.sqrt(tf.math.square(x-alpha*x)+tf.math.square(mu)))/2
def test_SMU(x):
print(SMU(x))
def test_SMU1(x):
print(SMU1(x))
def test():
x = tf.convert_to_tensor(np.array([[-0.6],[0.6]]),dtype=tf.float32)
test_SMU(x)
test_SMU1(x)
if __name__ == '__main__':
test()
代码及原理讲解可参考博客