一、什么是感知机?
在处理线性可分的二分类问题中 感知机随处可见,比如超过1.5m儿童购票,或者男生女生,都可以找到一个标准来准确的分类,在二维平面中,感知机就是一条一次函数,把平面中的样本分成2个类别。对于线性不可分的多分类问题,比我们可以用softmax或者多层感知机来判断属于每个类别的得分
二、多层感知机
多层感知机在单层神经网络的基础上引入了一到多个隐藏层(hidden layer)。隐藏层位于输入层和输出层之间。
可以看出,它与softmax的区别就是中间加了一层隐藏层,并且在每个隐藏层的输出,需要引入一个非线性激活函数(sigmoid, relu, tanh)用来防止模型塌陷
1.引入库和FashionMNIST数据集
import torch
import torchvision
import torchvision.transforms as transforms
train_data = torchvision.datasets.FashionMNIST(root='~/data',train=True,download=False,transform=transforms.ToTensor())
test_data = torchvision.datasets.FashionMNIST(root='~/data',train=False,download=False,transform=transforms.ToTensor())
2.设置参数,隐藏层单元设置为256
batch_size = 256
train_iter = torch.utils.data.DataLoader(train_data,batch_size,shuffle=True)
test_iter = torch.utils.data.DataLoader(test_data,batch_size,shuffle=False)
num_inputs, num_outputs, num_hiddens = 784, 10, 256
W1 = torch.normal(0, 0.01, (num_inputs, num_hiddens), dtype=torch.float,requires_grad=True)
b1 = torch.zeros(num_hiddens, dtype=torch.float,requires_grad=True)
W2 = torch.normal(0, 0.01, (num_hiddens, num_outputs), dtype=torch.float,requires_grad=True)
b2 = torch.zeros(num_outputs, dtype=torch.float,requires_grad=True)
3激活函数,损失函数,梯度下降,精确度,softmax函数
def relu(X):
return torch.max(input=X, other=torch.tensor(0.0))
def cross_entropy(y_hat,y):
return -torch.log(y_hat[range(len(y_hat)),y]) / len(y)
def linear(x,W1,W2,b1,b2):
h = relu(torch.matmul(x.view(-1,num_inputs),W1)+b1)
return torch.matmul(h.view(-1,num_hiddens),W2)+b2
def sgd(params,lr):
with torch.no_grad():
for param in params:
param -= lr * param.grad
param.grad.zero_()
def evaluate_accuracy(y_hat,y):
return (y_hat.argmax(dim=1)==y).sum().item()
def softmax(y):
y_exp = y.exp()
partirion = y_exp.sum(dim=1,keepdim=True)
return y_exp / partirion
4训练效果
def Train(train_iter,test_iter,lr,w1,w2,b1,b2):
for epoch in range(4):
train_acc,test_acc, n=0.0,0.0, 0
for x,y in train_iter:
y_hat = linear(x,W1,W2,b1,b2)
y_hat=softmax(y_hat)
l=cross_entropy(y_hat,y).sum()
l.backward()
sgd([w1,w2,b1,b2],0.1)
train_acc += evaluate_accuracy(y_hat,y)
test_acc +=evaluate_accuracy(y_hat,y)
n+=len(y)
print('loss= %.4f, train_acc= %.4f ,test_acc=%.4f' %(l.sum(),train_acc / n,test_acc / n))
Train(train_iter,test_iter,0.1,W1,W2,b1,b2)