grad在python什么模块_Python torch.autograd 模块,grad() 实例源码 - 编程字典

def _1st_order_trpo(self, detached_policy_loss_vb, detached_policy_vb, detached_avg_policy_vb, detached_splitted_policy_vb=None):

on_policy = detached_splitted_policy_vb is None

# KL divergence k = \delta_{\phi_{\theta}} DKL[ \pi(|\phi_{\theta_a}) || \pi{|\phi_{\theta}}]

# kl_div_vb = F.kl_div(detached_policy_vb.log(), detached_avg_policy_vb, size_average=False) # NOTE: the built-in one does not work on batch

kl_div_vb = categorical_kl_div(detached_policy_vb, detached_avg_policy_vb)

# NOTE: k & g are wll w.r.t. the network output, which is detached_policy_vb

# NOTE: gradient from this part will not flow back into the model

# NOTE: that's why we are only using detached policy variables here

if on_policy:

k_vb = grad(outputs=kl_div_vb, inputs=detached_policy_vb, retain_graph=False, only_inputs=True)[0]

g_vb = grad(outputs=detached_policy_loss_vb, inputs=detached_policy_vb, retain_graph=False, only_inputs=True)[0]

else:

# NOTE NOTE NOTE !!!

# NOTE: here is why we cannot simply detach then split the policy_vb, but must split before detach

# NOTE: cos if we do that then the split cannot backtrace the grads computed in this later part of the graph

# NOTE: it would have no way to connect to the graphs in the model

k_vb = grad(outputs=(kl_div_vb.split(1, 0)), inputs=(detached_splitted_policy_vb), retain_graph=False, only_inputs=True)

g_vb = grad(outputs=(detached_policy_loss_vb.split(1, 0)), inputs=(detached_splitted_policy_vb), retain_graph=False, only_inputs=True)

k_vb = torch.cat(k_vb, 0)

g_vb = torch.cat(g_vb, 0)

kg_dot_vb = (k_vb * g_vb).sum(1, keepdim=True)

kk_dot_vb = (k_vb * k_vb).sum(1, keepdim=True)

z_star_vb = g_vb - ((kg_dot_vb - self.master.clip_1st_order_trpo) / kk_dot_vb).clamp(min=0) * k_vb

return z_star_vb

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
pt_x_bc_var = Variable(torch.from_numpy(x_bc_var).float(), requires_grad=False) pt_x_in_pos_one = Variable(torch.from_numpy(x_in_pos_one).float(), requires_grad=False) pt_x_in_zeros = Variable(torch.from_numpy(x_in_zeros).float(), requires_grad=False) pt_t_in_var = Variable(torch.from_numpy(t_in_var).float(), requires_grad=False) pt_u_in_zeros = Variable(torch.from_numpy(u_in_zeros).float(), requires_grad=False) # 求边界条件的损失 net_bc_right = net(torch.cat([pt_x_in_zeros, pt_t_in_var], 1)) # u(0,t)的输出 mse_u_2 = mse_cost_function(net_bc_right, pt_u_in_zeros) # e = 0-u(0,t) 公式(2) net_bc_left = net(torch.cat([pt_x_in_pos_one, pt_t_in_var], 1)) # u(1,t)的输出 mse_u_3 = mse_cost_function(net_bc_left, pt_u_in_zeros) x_0 = torch.cat([pt_x_in_zeros, pt_t_in_var], 1) x_1 = torch.cat([pt_x_in_pos_one, pt_t_in_var], 1) pt_x_0 = x_0.detach().requires_grad_(True) pt_x_1 = x_1.detach().requires_grad_(True) net_bc_right.requires_grad_(True) net_bc_left.requires_grad_(True) u_x_0 = torch.autograd.grad(net_bc_right, pt_x_0, grad_outputs=torch.ones_like(net_bc_right), create_graph=True, allow_unused=True)[0][:, 0].unsqueeze(-1) u_x_1 = torch.autograd.grad(net_bc_left, pt_x_1, grad_outputs=torch.ones_like(net_bc_left), create_graph=True, allow_unused=True)[0][:, 0].unsqueeze(-1) u_xx_0 = torch.autograd.grad(u_x_0, pt_x_0, grad_outputs=torch.ones_like(u_x_0), create_graph=True, allow_unused=True)[0][:, 0].unsqueeze(-1) u_xx_1 = torch.autograd.grad(u_x_1, pt_x_1, grad_outputs=torch.ones_like(u_x_1), create_graph=True, allow_unused=True)[0][:, 0].unsqueeze(-1)这串代码有什么问题吗?该怎么解决
06-02
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值