GELU
这个函数特别占内存,计算量很大,对检测有帮助,收敛比relu6快
但是最高精度没有relu6高
梯度最大在第一层卷积层
类:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import torch
import math
from torch import nn
from torch.nn import functional as F
class mish(nn.Module):
def __init__(self):
super(mish, self).__init__()
# Also see https://arxiv.org/abs/1606.08415
def forward(self, x):
return x * torch.tanh(F.softplus(x))
class Gelu(nn.Module):
def __init__(self):
super(Gelu, self).__init__()
# Also see https://arxiv.org/abs/1606.08415
def forward(self, x):
return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
class Gelu_new(nn.Module):
def __init__(self):