摘要
本文使用纯 Python 和 PyTorch 对比实现softmax函数及其反向传播.
相关
原理和详细解释, 请参考文章 :
softmax函数详解及反向传播中的梯度求导
系列文章索引 :
https://blog.csdn.net/oBrightLamp/article/details/85067981
正文
import numpy as np
import torch
class Solfmax:
def __init__(self):
self.softmax = None
self.grad = None
self.dnx = None
def __call__(self, nx):
shifted_x = nx - np.max(nx)
ex = np.exp(shifted_x)
sum_ex = np.sum(ex)
self.solfmax = ex / sum_ex
return self.solfmax
def get_grad(self):
self.grad = self.solfmax[:, np.newaxis] * self.solfmax[np.newaxis, :]
for i in range(len(self.grad)):
self.grad[i, i] -= self.solfmax[i]
self.grad = - self.grad
return self.grad
def backward(self, dl):
self.get_grad()
self.dnx = np.sum(self.grad * dl, axis=1)
return self.dnx
np.random.seed(123)
np.set_printoptions(precision=8, suppress=True, linewidth=120)
d_loss = np.array([11, 12, 13, 14, 15, 16, 17, 18, 19], dtype=float)
d_loss_tensor = torch.tensor(d_loss, requires_grad=True)
softmax_numpy = Solfmax()
x_numpy = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=float)
soft_numpy = softmax_numpy(x_numpy)
x_grad_numpy = softmax_numpy.backward(d_loss)
x_tensor = torch.tensor(x_numpy, requires_grad=True)
soft_tensor = torch.nn.functional.softmax(x_tensor, dim=0)
soft_tensor.backward(d_loss_tensor)
x_grad_tensor = x_tensor.grad
print(soft_numpy)
print(soft_tensor.data.numpy())
print()
print(x_grad_numpy)
print(x_grad_tensor.data.numpy())
"""
代码输出 :
[ 0.00021208 0.00057649 0.00156706 0.00425972 0.01157912 0.03147531 0.08555877 0.23257286 0.63219858]
[ 0.00021208 0.00057649 0.00156706 0.00425972 0.01157912 0.03147531 0.08555877 0.23257286 0.63219858]
[-0.00157344 -0.00370057 -0.00849213 -0.01882428 -0.03959057 -0.07614301 -0.12141937 -0.09747922 0.36722258]
[-0.00157344 -0.00370057 -0.00849213 -0.01882428 -0.03959057 -0.07614301 -0.12141937 -0.09747922 0.36722258]
"""