多层感知机
ReLu的存在,是为了调节X平方或者高次方前边的系数,组合起来更贴近你想要的那条曲线用的
多层感知机的简洁实现
##多层感知机的从零实现
import torch
from torch import nn
from d2l import torch as d2l
batch =256
train,test = d2l.load_data_fashion_mnist(batch)
#单隐藏层感知机
hide = 256
input = 784
output = 10
w1= nn.Parameter(torch.randn(input,hide,requires_grad=True))
b1 = nn.Parameter(torch.zeros(hide,requires_grad=True))
w2 = nn.Parameter(torch.randn(hide,output,requires_grad=True))
b2 = nn.Parameter(torch.zeros(output,requires_grad=True))
param = [w1,b1,w2,b2]
#激活函数
def relu(x):
a= torch.zeros_like(x)
return torch.max(x,a)
def net(x):
x=x.reshape((-1,input))
h = relu(x@w1+b1)#@矩阵乘法
return (h@w2+b2)
#可以显示出loss图像
loss = nn.CrossEntropyLoss(reduction='none')
xunlaincishu,xuexilv = 10,0.1
updata = torch.optim.SGD(param,xuexilv)
d2l.train_ch3(net,train,test,loss,xunlaincishu,updata)
d2l.plt.show()
#简洁实现
import torch
from d2l import torch as d2l
from torch import nn
batch = 256
train,test = d2l.load_data_fashion_mnist(batch)
net = nn.Sequential(nn.Flatten(),nn.Linear(784,256),nn.ReLU(),nn.Linear(256,10))
def shenchen_w(m):
if type(m) ==nn.Linear:
nn.init.normal_(m.weight,std=0.01)
net.apply(shenchen_w);
xunliancishi,xuexilv = 10,0.1
loss = nn.CrossEntropyLoss(reduction='none')
up=torch.optim.SGD(net.parameters(),xuexilv)
d2l.train_ch3(net,train,test,loss,xunliancishi,up)
d2l.plt.show()