from torch.autograd import Variable as V
import torch as t
from torch import nn
n, d, m = 3, 5, 7
embedding = nn.Embedding(n, d, max_norm=True)
W = t.randn((m, d), requires_grad=True)
idx = t.tensor([1, 2])
a = embedding.weight.clone() @ W.t() # weight must be cloned for this to be differentiable
b = embedding(idx) @ W.t() # modifies weight in-place
out = (a.unsqueeze(0) + b.unsqueeze(1))
print(a.unsqueeze(0).shape, b.unsqueeze(1).shape)
# with t.no_grad():
print(a.unsqueeze(0), "\n", b.unsqueeze(1), "\n", (a.unsqueeze(0) + b.unsqueeze(1)).shape)
print( (a.unsqueeze(0) + b.unsqueeze(1)))
loss = out.sigmoid().prod()
loss.backward()
/home/wangbin/anaconda3/envs/deep_learning/bin/python3.7 /home/wangbin/anaconda3/envs/deep_learning/project/main.py
torch.Size([1, 3, 7]) torch.Size([2, 1, 7])
tensor([[[ 1.0560, -0.3363, 1.9043, -0.8517, 0.0666, -0.1867, -0.1422],
[-1.0370, -0.8827, 0.1464, -0.4847, -0.0349, -0.