参考深度前馈网络与Xavier初始化原理 - 知乎 (zhihu.com)
xavier_normal_ 初始化,在假设输入x的均值为0的情况下,可以保持输出y与输入x的方差不变,用代码测试一下
import torch
def get_linear(vavier=False):
linear = torch.nn.Linear(300, 400)
if vavier:
torch.nn.init.xavier_normal_(linear.weight)
torch.nn.init.kaiming_uniform_()
return linear
def get_x():
x = 9 * torch.randn(300, 300)
return x
if __name__ == '__main__':
x = get_x()
linear = get_linear(True)
linear2 = get_linear()
relu = torch.nn.ReLU()
print(f"原始输入x均值为{x.mean()}, 方差为{x.var()}")
# print(f"linear层w均值为{linear.weight.mean()}, 方差为{linear.weight.var()}")
x_out = linear(x)
x_out2 = linear2(x)
print(f"x经过xavier初始化linear层后w均值为{x_out.mean()}, 方差为{x_out.var()}")
print(f"x经过kaiming初始化linear层后w均值为{x_out2.mean()}, 方差为{x_out2.var()}")
x_out = relu(x_out)
x_out2 = relu(x_out2)
print(f"x经过xavier,relu层后w均值为{x_out.mean()}, 方差为{x_out.var()}")
print(f"x经过kaiming,relu层后w均值为{x_out2.mean()}, 方差为{x_out2.var()}")