不管用。因为学习率乘以梯度是步长,而梯度衰减返回的梯度是0所以调大学习率不管用。
from enum import auto
from scipy.io import loadmat
import numpy as np
import torch
import torch.utils.data as data_utils
from torch import nn
import torch.optim as optim
network=nn.Linear(1,1)
#network1=nn.BatchNorm1d(1)
w=nn.Sigmoid()
tr=torch.Tensor([[100000],[200000]])
#tr=torch.Tensor([[1],[2]])
test=torch.Tensor([[150000],[300000]])
optimizer = optim.Adam(network.parameters(), lr=4000000000000000)
#optimizer1 = optim.Adam(network1.parameters(), lr=0.04)
l1=0
while True:
network.train()
#network1.train()
#network1.eval()#
optimizer.zero_grad()
#optimizer1.zero_grad()
l=w(network(tr))
#l=w(network1(network(tr)))
#print(network1(network(tr)))
l=(l[0]-0)**2+(l[1]-1)**2
l.backward()
for name, parms in network.named_parameters():
print(‘–>name:’, name)
print(‘–>para:’, parms)
print(‘–>grad_requirs:’,parms.requires_grad)
print(‘–>grad_value:’,parms.grad)
print(“===”)
输出是0
好文要顶 关注我 收藏该文