Pytorch：Defining new autograd functions_pytorch: defining new autograd functions-CSDN博客

本文链接：https://blog.csdn.net/z2536083458/article/details/88958690

参考资料：https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
前向函数：根据输入的张量计算输出的张量值；
反向函数：接受输出的张量，计算关于输入张量的梯度。

在pytorch中可以很容易地通过定义torch.autograd.Function的子类定义自动求梯度的算子，并且实现前向和反向的函数，可以通过我们定义的自动求梯度的算子来构造一个实例，并且像是调用一个函数来使用它。

import torch

class MyRelU(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input):
        """ctx表示object的上下文，可以为反向传播存储信息"""
        ctx.save_for_backward(input)
        return input.clamp(min=0)

    @staticmethod
    def backward(ctx, grad_outputs):
        input, = ctx.saved_tensors
        grad_input = grad_outputs.clone()
        grad_input[input < 0] = 0
        return grad_input

dtype = torch.float
device = torch.device("cpu")

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)
learning_rate = 1e-6

for t in range(500):
    relu = MyRelU.apply
    y_pred = relu(x.mm(w1)).mm(w2)
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())
    loss.backward()

    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()

Tensorflow:Static Graphs

pytorch的自动求导有点类似于tensorflow:定义计算图，然后自动求微分。最大的区别在于tensorflow的计算图是静态的，而pytorch使用动态的计算图。
在Tensorflow中，我们一旦定义了一个计算图，便会在这个计算图上一个又一次的执行，也可能给这个计算图赋予不同的输入数据，在pytorch中，每一个前向传播都会定义一个新的计算图。
静态图的优势在于可以预先优化图，动态图和静态图的一个区别在于控制流。

import tensorflow as tf
import numpy as np

N, D_in, H, D_out = 64, 1000, 100, 10

x = tf.placeholder(tf.float32, shape=(None, D_in))
y = tf.placeholder(tf.float32, shape=(None, D_out))

w1 = tf.Variable(tf.random_normal((D_in, H)))
w2 = tf.Variable(tf.random_normal((H, D_out)))
# w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
# w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

h = tf.matmul(x, w1)
h_relu = tf.maximum(h, tf.zeros(1))
y_pred = tf.matmul(h_relu, w2)
loss = tf.reduce_sum((y - y_pred) ** 2.0)
grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

learning_rate = 1e-6
# 在tensorflow中，更新权重是在计算图中的
# 在pytorch中，更新权重在计算图之外
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)

# with torch.no_grad():
#     w1 -= learning_rate * w1.grad
#     w2 -= learning_rate * w2.grad
#     w1.grad.zero_()
#     w2.grad.zero_()

with tf.Session() as sess:
    # 初始化变量w1, w2
    sess.run(tf.global_variables_initializer())
    x_value = np.random.randn(N, D_in)
    y_value = np.random.randn(N, D_out)
    # x = torch.randn(N, D_in, device=device, dtype=dtype)
    # y = torch.randn(N, D_out, device=device, dtype=dtype)

    for _ in range(500):
        loss_value, _, _ = sess.run([loss, new_w1, new_w2],
                                    feed_dict={x: x_value, y:y_value})
        # loss = (y_pred - y).pow(2).sum()
        # print(t, loss.item())
        print(loss_value)