1. 单层感知机
输入层:输入的值和权重,w*x进行求和,得到X01
激活函数:sigmoid函数,函数值在0~1之间,函数的导数=函数值(1-函数值)
Loss函数:E=1/2(激活函数的结果-t)**2求和
梯度的推导:O*(O-t)*(1-O)*xj0
print("示例一:")
x=torch.randn(1,10)
print(x)#tensor([[ 1.0178, 0.9737, -2.6737, -0.4783, 0.9945, 0.0411, 0.1195, -1.8108,-2.2169, 2.6374]])
w=torch.randn(1,10)
w.requires_grad_()
print(w)#tensor([[-0.8147, -1.2318, 0.0135, 0.6399, 0.7016, 1.0016, 0.1187, -0.6978,-1.1510, -0.4259]], requires_grad=True)
#激活函数sigmoid
o=torch.sigmoid(x@w.t())#转置运算x:(1,10)@ w.t:(10,1)
print(o.shape)#torch.Size([1, 1])
print(o)#tensor([[0.0413]], grad_fn=<SigmoidBackward0>)
#loss函数
loss=F.mse_loss(torch.ones(1,1),o)#两者的dimension=2
print(loss.shape)#得到的是标量 torch.Size([])
loss.backward()
print(w.grad)
2. 单层多个感知机
print("单层多个感知机示例二:")
x=torch.randn(1,10)
print(x)#tensor([[-1.1233, -1.6067, -0.6289, -1.8723, 1.4205, -2.0434, -0.3075, -1.2972,0.7973, 0.7161]])
w=torch.randn(2,10)
w.requires_grad_()#tensor([[ 1.1690, 0.4752, -0.3357, -0.0897, -0.0722, 0.0261, -0.8838, 0.3440,
#-0.4551, 0.8683],
#[-0.3424, -0.5759, 1.5063, -1.2144, -1.1837, -0.6236, -1.1107, -0.2026,
# 1.3414, -0.2479]], requires_grad=True)
print(w)
#激活函数
o=torch.sigmoid(x@w.t())#(1,10)@(10,2)=(1,2)
print(o.shape)
print(o)#[[x,y]] tensor([[0.1457, 0.9765]], grad_fn=<SigmoidBackward0>)
#loss函数
loss=F.mse_loss(torch.ones(1,2),o)
print(loss)#tensor(0.3652, grad_fn=<MseLossBackward0>)
loss.backward()
print(w.grad)#tensor([[ 1.1944e-01, 1.7083e-01, 6.6869e-02, 1.9908e-01, -1.5103e-01,
# 2.1727e-01, 3.2701e-02, 1.3793e-01, -8.4774e-02, -7.6137e-02],
#[ 6.0742e-04, 8.6883e-04, 3.4008e-04, 1.0125e-03, -7.6813e-04,
# 1.1050e-03, 1.6631e-04, 7.0149e-04, -4.3114e-04, -3.8722e-04]])
3. 链式法则
y2=y1*w2+b2. y1=w1*x+b1
#链式法则
#y2=y1*w2+b2
x=torch.tensor(1.)#[1.]
w1=torch.tensor(2.)
w1.requires_grad_()
b1=torch.tensor(1.)
w2=torch.tensor(2.)
w2.requires_grad_()
b2=torch.tensor(1.)
y1=x*w1+b1
y2=y1*w2+b2
dy2_dy1=torch.autograd.grad(y2,[y1],retain_graph=True)[0]
dy1_dw1=torch.autograd.grad(y1,[w1],retain_graph=True)[0]
dy2_dw1=torch.autograd.grad(y2,[w1],retain_graph=True)[0]
print(dy2_dy1*dy1_dw1)#tensor(2.)
print(dy2_dw1)#dy2/dw1=(dy2/dy1)*(dy1/dw1) tensor(2.)