PyTorch深度学习(B站刘二大爷)第九讲 多分类问题
知识点
Numpy中的交叉熵
import numpy as np
y=np.array([1,0,0])
z=np.array([0.2,0.1,-0.1])
y_pred = np.exp(z)/np.exp(z).sum()
loss = (-y* np.log(y_pred)).sum()
print(loss)
Pytorch中的交叉熵
import torch
y=torch.LongTensor([0]) #标签分类
z=torch.Tensor([[0.2,0.1,-0.1]])
criterion = torch.nn.CrossEntropyLoss()
loss = criterion(z,y)
print(loss)
import torch
criterion = torch.nn.CrossEntropyLoss()
Y = torch.LongTensor([2,0,1]) #大小顺序为 第三个 第一个 第二个
Y_pred1 = torch.Tensor([[0.1,0.2,0.9], #第三个最大 符合2
[1.1,0.1,0.2], #第一个最大 符合0
[0.2,2.1,0.1]])#第二个最大 符合1
Y_pred2 = torch.Tensor([[0.8,0.2,0.3], #第一个最大 符合1
[0.2,0.3,0.5], #第三个最大 符合2
[0.2,0.2,0.5]])#第三个最大 符合2
l1=criterion(Y_pred1,Y)
l2=criterion(Y_pred2,Y)
print('Batch Loss1 = ',l1.data,'\n Batch Loss2=',l2.data)
NLLLOSS
https://pytorch.org/docs/stable/generated/torch.nn.NLLLoss.html#torch.nn.NLLLoss
m = torch.nn.LogSoftmax(dim=1)
loss = torch.nn.NLLLoss()
# input is of size N x C = 3 x 5
input = torch.randn(3, 5, requires_grad=True)
# each element in target has to have 0 <= value < C
target = torch.tensor([1, 0, 4])
output = loss(m(input), target)
output.backward()
# 2D loss example (used, for example, with image inputs)
N, C = 5, 4
loss = torch.nn.NLLLoss()
# input is of size N x C x height x width
data = torch.randn(N, 16, 10, 10)
conv = torch.nn.Conv2d(16, C, (3, 3))
m = torch.nn.LogSoftmax(dim=1)
# each element in target has to have 0 <= value < C
target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
output = loss(m(conv(data)), target)
output.backward()
CrossenTropy
https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html#torch.nn.CrossEntropyLoss
# Example of target with class indices
loss = torch.nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
output.backward()
# Example of target with class probabilities
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
output = loss(input, target)
output.backward()
MNIST 数据集多分类
导入库
import torch
from torchvision import transforms #图像处理
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F #为了使用激活函数
import torch.optim as optim
数据准备
batch_size =64
transform = transforms.Compose([ #把[]里的东西构成pipline一样处理
transforms.ToTensor(), #将PILlow图像转为Tensor 图像是{0,1,2,...,255} 转为[0,1] ,把维度28*28变成1*28*28;
transforms.Normalize((0.1307,),(0.3081,)) #均值 标准差 变成0-1分布;
])
train_dataset = datasets.MNIST(root='../dataset/mnist/',
train=True,
download=True,
transform=transform)
train_loader= DataLoader(train_dataset,
shuffle=True,
batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset/mnist/',
train=False,
download=True,
transform=transform)
test_loader = DataLoader(test_dataset,
shuffle = False,
batch_size=batch_size)
模型设计
class Net(torch.nn.Module):
def __init__(self):
super(Net,self).__init__()
self.l1 = torch.nn.Linear(784,512)
self.l2 = torch.nn.Linear(512,256)
self.l3 = torch.nn.Linear(256,128)
self.l4 = torch.nn.Linear(128,64)
self.l5 = torch.nn.Linear(64,10)
def forward(self,x):
x = x.view(-1,784) #784个元素 view改变张量的形状,改成2阶的 -1自动计算N
x = F.relu(self.l1(x))
x = F.relu(self.l2(x))
x = F.relu(self.l3(x))
x = F.relu(self.l4(x))
return self.l5(x) # 最后一层不做激活,不进行非线性变换
model = Net()
损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr = 0.01 ,momentum=0.5) #冲量,冲过鞍点和局部最优
训练和测试
def train(epoch): # 每轮循环封装成函数 epoch决定循环多少次
running_loss = 0.0
for batch_idx , data in enumerate(train_loader,0):
inputs,target = data #输入数据
optimizer.zero_grad() #优化器归零
#正向+反向+更新
outputs = model(inputs) # 获得模型预测结果(64, 10)
loss = criterion(outputs , target) # 交叉熵代价函数outputs(64,10),target(64)
loss.backward()
optimizer.step()
running_loss += loss.item() #累计的损失
if batch_idx %300 ==299: # 每300次打印一次平均损失,因为idx是从0开始的,所以%299,而不是300
print('[%d,%5d] loss:%.3f'% (epoch+1 ,batch_idx+1 , running_loss/300))
running_loss = 0.0
def test():
correct =0 # 所有预测正确的样本数
total = 0 # 所有样本的数量
with torch.no_grad(): #test里不需要计算梯度
for data in test_loader:
images , labels = data
outputs= model(images) # 获得预测值
_,predicted = torch.max(outputs.data , dim=1) #沿着dim=1 (维度1=每一行)找最大值. <_>是最大值(因为用不到,所以这样写),<predicted>是最大值的索引
total += labels.size(0) # 累加每批次的样本数,以获得一个测试周期所有的样本数 ;labels.size(0) 第0个元素,(N,1)
correct += (predicted == labels).sum().item() #predicted==labels 布尔认证,相等就是真(1),不相等就是假(0)
print('测试集准确率:%d %%' %(100*correct / total))
if __name__ =='__main__':
for epoch in range(10):
train(epoch)
test()