import torch
import torch.nn as nn
class RMSNorm(nn.Module):
def __init__(self, d_model, eps=1e-8):
"""
d_model: 输入特征的维度
eps: 防止除零的微小数值,确保数值稳定性
"""
super().__init__()
self.eps = eps
self.scale = nn.Parameter(torch.ones(d_model))
self.bias = nn.Parameter(torch.zeros(d_model))
def forward(self, x):
rms = torch.sqrt(torch.mean(x ** 2, dim=-1, keepdim=True) + self.eps)
x_norm = x / rms
return self.scale * x_norm + self.bias
batch_size = 2
seq_len = 5
d_model = 10
x = torch.randn(batch_size, seq_len, d_model)
rmsnorm = RMSNorm(d_model)
output = rmsnorm(x)
print("输出张量:", output)
import torch
import torch.nn as nn
class LayerNorm(nn.Module):
def __init__(self, d_model, eps=1e-5):
"""
d_model: 输入特征的维度
eps: 防止除零的微小数值,确保数值稳定性
"""
super().__init__()
self.scale = nn.Parameter(torch.ones(d_model))
self.bias = nn.Parameter(torch.zeros(d_model))
self.eps = eps
def forward(self, x):
mean = x.mean(dim=-1, keepdim=True)
std = x.std(dim=-1, keepdim=True)
x_norm = (x - mean) / (std + self.eps)
return self.scale * x_norm + self.bias
batch_size = 2
seq_len = 5
d_model = 10
x = torch.randn(batch_size, seq_len, d_model)
layernorm = LayerNorm(d_model)
output = layernorm(x)
print("输出张量:", output)