PyTorch深度学习笔记
一、入门
1. PyTorch深度学习
参考视频: 1.PyTorch深度学习快速入门教程(绝对通俗易懂!)【小土堆】
参考笔记: 2.笔记 【电信保温杯】
1.1 PyTorch加载数据——Dataset类
## 在Python Console中使用
from torch.utils.data import Dataset
help(Dataset)# 查询 Dataset 的doc描述
# 小土堆 复现
# pytorch加载数据
# Dataset类
import cv2
from torch.utils.data import Dataset
import os
class MyData(Dataset):
def __init__(self, root_dir, label_dir):
self.root_dir = root_dir
self.label_die = label_dir
self.path = os.path.join(self.root_dir, self.label_die)
self.img_path = os.listdir(self.path) # 返回指定的文件夹包含的文件或文件夹的名字的列表。eg:['000000.jpg', '000003.jpg', '000005.jpg']
def __getitem__(self, index):
img_name = self.img_path[index]
img_item_path = os.path.join(self.path, img_name)
img = cv2.imread(img_item_path)
label = self.label_die
return img, label
def __len__(self):
return len(self.img_path)
root_dir = r"E:\aMySoftware\PyCharm\Write\YOLOv5\myYOLO\dataset"
label_dir = r"hat"
MY = MyData(root_dir, label_dir)
img, label = MY[0]
print(cv2.imshow('image', img))
cv2.waitKey(0)
1.2 TensorBoard的使用
import cv2
from torch.utils.tensorboard import SummaryWriter
write = SummaryWriter("logs")
# 1.add_scalar()
# add_scalars(self, main_tag, tag_scalar_dict, global_step=None, walltime=None):
# tag_scalar_dict为 y轴, global_step为x轴
for i in range(100):
write.add_scalar("y=2x", 2 * i, i)
# 2.add_image()
img = cv2.imread(r"E:\aMySoftware\PyCharm\Write\YOLOv5\myYOLO\dataset\hat\000008.jpg")# numpy.array
# print(img.shape)#(400, 308, 3)
write.add_image("test", img, 0, dataformats='HWC') # img_tensor=numpy.array
write.close()
TensorBoard:计算图的查看及数据无法加载进来的问题。
1.3 图像变换——Transforms的使用
torchvision.transforms是pytorch中的图像预处理包。
# PIL
from PIL import Image
img = Image.open(r"E:\aMySoftware\PyCharm\Write\YOLOv5\myYOLO\dataset\hat\000008.jpg")
print(img) # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=308x400 at 0x14F42953250>
# cv2
import cv2
from torchvision import transforms
img=cv2.imread(r"E:\aMySoftware\PyCharm\Write\YOLOv5\myYOLO\dataset\hat\000008.jpg")
# tensor数据类型
# 通过 transforms.ToTensor
totensor=transforms.ToTensor()
tensor_img=totensor(img)
print(tensor_img)
为什么需要Tensor数据类型?
因为Tensor数据类型包装了神经网络所需要的一些理论参数
# 结合上面tensorboard的知识
import cv2
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
write = SummaryWriter("logs")
img = cv2.imread(r"E:\aMySoftware\PyCharm\Write\YOLOv5\myYOLO\dataset\hat\000008.jpg")
totensor = transforms.ToTensor()
tensor_img = totensor(img)
write.add_image("tensor_img", tensor_img, 0) # img_tensor=torch.Tensor
write.close()
1.3.1 常见的Transforms函数
from PIL import Image
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
write = SummaryWriter("logs")
img = Image.open(r"E:\aMySoftware\PyCharm\Write\YOLOv5\myYOLO\dataset\hat\000008.jpg")
# 1.ToTensor()
totensor = transforms.ToTensor()
tensor_img = totensor(img)
# 2.Normalize() 正则化
# output[channel] = (input[channel] - mean[channel]) / std[channel]
# 均值和标准差 eg:(input-0.5)/0.5 = 2*input-1
# 即 input [0,1] → resul [-1,1]
trans_norm = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
norm_img = trans_norm(tensor_img)
write.add_image("norm_img", tensor_img, 1)
# 3.Resize
print(img.size) # (308, 400)
trans_resize = transforms.Resize((512, 512))
resize_img = trans_resize(img)
print(resize_img) # <PIL.Image.Image image mode=RGB size=512x512 at 0x27367D2C790>
# 4.Compose
# Composes several transforms together 用Compose把多个步骤整合一起
# >>> transforms.Compose([
# >>> transforms.CenterCrop(10),
# >>> transforms.PILToTensor(),
# >>> transforms.ConvertImageDtype(torch.float),
# >>> ])
trans_compose = transforms.Compose([trans_resize, totensor])
compose_img = trans_compose(img)
write.add_image("compose_img", compose_img, 2)
# 5.RandomCrop 在一个随机的位置进行裁剪
trans_random=transforms.RandomCrop(200)
trans_compose_1=transforms.Compose([trans_random,totensor])
for i in range(10):
crop_img=trans_compose_1(img)
write.add_image("crop_img", crop_img, i)
write.close()
1.4 torchvision中的数据集使用
import torchvision
train_set = torchvision.datasets.CIFAR10(root="./dataset1", train=True, download=True)
test_set = torchvision.datasets.CIFAR10(root="./dataset1", train=False, download=True)
print(test_set)
print(test_set.classes) # ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
#测试数据集中的第一张照片及target
print(test_set[0]) # (<PIL.Image.Image image mode=RGB size=32x32 at 0x2DA89164E50>, 3)
img, target = test_set[0]
print(img) # <PIL.Image.Image image mode=RGB size=32x32 at 0x2DA89164E50>
print(target) # 3
print(test_set.classes[target]) # cat
print(img.show())
# 结合dataset和transform
import torchvision
dataset_transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
])
train_set = torchvision.datasets.CIFAR10(root="./dataset1", train=True, transform=dataset_transform, download=True)
test_set = torchvision.datasets.CIFAR10(root="./dataset1", train=False, transform=dataset_transform, download=True)
1.5 DataLoader的使用
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
test_set = torchvision.datasets.CIFAR10(root="./dataset1", train=False, transform=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_set, batch_size=64, shuffle=True, num_workers=0, drop_last=False)
# img, target = test_set[0]
writer = SummaryWriter("logs")
step = 0
for data in test_loader:
imgs, targets = data
writer.add_images("imgs", imgs, step)
step = step + 1
writer.close()
1.6 神经网络的搭建
torch.nn
1.6.1 神经网络的基本骨架——nn.Module的使用
Containers
import torch
from torch import nn
class Model(nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
x = x + 1
return x
model = Model()
input = torch.tensor(1.0)
output = model(input)
print(output)
# 输出:
# tensor(2.)
1.6.2 神经网络-卷积层
Convolution Layers
lilation 空洞卷积
import torch
from torch import nn
import torchvision
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
test_set = torchvision.datasets.CIFAR10(root="./dataset1", train=False, transform=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_set, batch_size=64, shuffle=True, num_workers=0, drop_last=False)
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self, x):
x = self.conv1(x)
return x
model = Model()
print(model)
# Model(
# (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
# )
writer = SummaryWriter("logs")
step = 0
for data in test_loader:
imgs, targets = data
writer.add_images("input", imgs, step)
output = model(imgs)
print(imgs.shape) # torch.Size([64, 3, 32, 32]) # batch_size=64,in_channels=3
print(output.shape) # torch.Size([64, 6, 30, 30]) # out_channels=6
# [64, 6, 30, 30] → [xxx, 3, 30, 30]
output = torch.reshape(output, (-1, 3, 30, 30))
print(output.shape) # torch.Size([128, 3, 30, 30])
writer.add_images("output", output, step)
step = step + 1
vgg16 model:
1.6.3 神经网络-最大池化的使用
Pooling layers
最大池化 MaxPool → 下采样,MaxUnpool → 上采样
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
import torch
from torch import nn
from torch.nn import MaxPool2d
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]], dtype=torch.float32)
input = torch.reshape(input, (-1, 1, 5, 5))
print(input.shape) # torch.Size([1, 1, 5, 5])
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.maxpool = MaxPool2d(kernel_size=3, ceil_mode=True)
def forward(self, input):
output = self.maxpool(input)
return output
model = Model()
output = model(input)
print(output)
## ceil_mode=True:
# tensor([[[[2., 3.],
# [5., 1.]]]])
1.6.4 神经网络-非线性激活
Non-linear Activations (weighted sum, nonlinearity)
inplace=Flase:不改变原变量的值
import torch
from torch import nn
from torch.nn import ReLU
input = torch.tensor([[1, -0.5],
[-1, 3]], dtype=torch.float32)
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape) # torch.Size([1, 1, 2, 2])
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.relu = ReLU()
def forward(self, input):
output = self.relu(input)
return output
model = Model()
output = model(input)
print(output)
# tensor([[[[1., 0.],
# [0., 3.]]]])
1.6.5 神经网络-线性层及其他层介绍
Normalization Layers 正则化层
Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .
Linear Layers
import torch
import torchvision
from torch import nn, flatten
from torch.nn import Linear
from torch.utils.data import DataLoader
test_set = torchvision.datasets.CIFAR10(root="./dataset1", train=False, transform=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_set, batch_size=64, shuffle=True, num_workers=0, drop_last=False)
for data in test_loader:
imgs, targets = data
print(imgs.shape) # torch.Size([64, 3, 32, 32])
output = torch.reshape(imgs, [1, 1, 1, -1])
print(output.shape) # torch.Size([1, 1, 1, 196608])
m = Linear(196608, 10)
output = m(output)
print(output.shape) # torch.Size([1, 1, 1, 10])
1.6.6 神经网络-搭建小实战和Sequential的使用
linear model neural network:
import torch
from torch import nn
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1=nn.Conv2d(3,32,5,padding=2)
self.maxpool1=nn.MaxPool2d(2)
self.conv2=nn.Conv2d(32,32,5,padding=2)
self.maxpool2 = nn.MaxPool2d(2)
self.conv3 = nn.Conv2d(32, 64, 5, padding=2)
self.maxpool3 = nn.MaxPool2d(2)
self.flatten=nn.Flatten()
self.linear1=nn.Linear(1024,64)
self.linear2 = nn.Linear(64,10)
def forward(self,x):
x=self.conv1(x)
x=self.maxpool1(x)
x = self.conv2(x)
x = self.maxpool2(x)
x = self.conv3(x)
x = self.maxpool3(x)
x = self.flatten(x)
x = self.linear1(x)
x = self.linear2(x)
return x
model=Model()
print(model)
input=torch.ones((64,3,32,32))
output=model(input)
print(output.shape)# torch.Size([64, 10])
与手动调用一系列模块 Sequential 相比, Sequential 提供的值是它允许将整个容器视为单个模块,以便对它执行的转换应用于它存储的每个模块(每个模块都是 Sequential 的注册子模块)。
import torch
from torch import nn
from torch.utils.tensorboard import SummaryWriter
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
model = Model()
print(model)
input = torch.ones((64, 3, 32, 32))
output = model(input)
print(output.shape) # torch.Size([64, 10])
writer = SummaryWriter("logs")
writer.add_graph(model, input)
writer.close()
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
test_set = torchvision.datasets.CIFAR10(root="./dataset1", train=False, transform=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_set, batch_size=1)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
model = Model()
for data in test_loader:
imgs, targets = data
output = model(imgs)
print(output)
# tensor([[0.1158, -0.0067, 0.1664, -0.1014, -0.0459, 0.1037, -0.0885, 0.0335,
# -0.1372, 0.0528]], grad_fn= < AddmmBackward0 >)
print(targets)
# tensor([3])
1.6.7 损失函数与反向传播
import torch
from torch import nn
input = torch.tensor([1, 2, 3], dtype=torch.float32)
input = torch.reshape(input, (1, 1, 1, 3))
target = torch.tensor([1, 2, 5], dtype=torch.float32)
target = torch.reshape(target, (1, 1, 1, 3))
loss = nn.L1Loss()
result = loss(input, target)
print(result) # tensor(0.6667)
loss_mse = nn.MSELoss()
result_mse = loss_mse(input, target)
print(result_mse) # tensor(1.3333)
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
test_set = torchvision.datasets.CIFAR10(root="./dataset1", train=False, transform=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_set, batch_size=1)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
loss = nn.CrossEntropyLoss()
model = Model()
for data in test_loader:
imgs, targets = data
output = model(imgs)
print(output)
# tensor([[0.1158, -0.0067, 0.1664, -0.1014, -0.0459, 0.1037, -0.0885, 0.0335,
# -0.1372, 0.0528]], grad_fn= < AddmmBackward0 >)
print(targets)
# tensor([3])
result_loss = loss(output, targets)
print(result_loss)
# tensor(2.2721, grad_fn=<NllLossBackward0>)
result_loss.backward() # 反向传播 grad
1.6.8 优化器(一)
torch.optim
# 要构造一个优化器,你必须给它一个包含参数的可迭代对象来优化
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam([var1, var2], lr=0.0001)
for input, target in dataset:
optimizer.zero_grad() # 把上一次计算的梯度清零
output = model(input)
loss = loss_fn(output, target)
loss.backward() # 损失反向传播,计算梯度
optimizer.step() # 使用梯度进行学习,即参数的优化
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
test_set = torchvision.datasets.CIFAR10(root="./dataset1", train=False, transform=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_set, batch_size=1)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
model = Model()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(20):
running_loss = 0.0
for data in test_loader:
imgs, targets = data
output = model(imgs)
result_loss = loss(output, targets)
optimizer.zero_grad()
result_loss.backward()
optimizer.step()
running_loss = running_loss + result_loss
print(running_loss)
1.6.9 现有网络模型的使用及修改
出名的模型才会有现有的
import torchvision
from torch import nn
vgg16_false=torchvision.models.vgg16(pretrained=False)
vgg16_true=torchvision.models.vgg16(pretrained=True)# If True, returns a model pre-trained on ImageNet
print(vgg16_true)
vgg16_true.classifier.add_module("add_linear",nn.Linear(4096,10))
print(vgg16_true)
vgg16_false.classifier[6]=nn.Linear(4096,10)
print(vgg16_false)
1.6.10 网络模型的保存与读取
import torch
import torchvision
vgg16=torchvision.models.vgg16(pretrained=False)
# 保存方式1(模型结构+模型参数)
torch.save(vgg16,"vgg16_method1.pth")
# 方式1-》加载模型
model1=torch.load("vgg16_method1.pth")
print(model1)
# 保存方式2(模型参数(官方推荐))
torch.save(vgg16.state_dict(),"vgg16_method2.pth")
# 方式2-》加载模型
model2=torch.load("vgg16_method2.pth")
print(model2)
vgg16.load_state_dict(model2)
print(vgg16)
1.6.11 完整的模型训练
准确率:
import torch
outputs = torch.tensor([[0.1, 0.2],
[0.3, 0.4]])
preds = outputs.argmax(1) # 1:横向
print(preds)
targets = torch.tensor([0, 1])
print((preds == targets).sum())
# tensor([1, 1])
# tensor(1)
训练完整代码:
model.py
import torch
from torch import nn
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
if __name__ == '__main__':
model = Model()
# 检查网络是否出错
input = torch.ones((64, 3, 32, 32))
output = model(input)
print(output.shape)
train.py
import torch
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *
# 准备数据集
train_data = torchvision.datasets.CIFAR10(root="repetition/dataset1", train=True,
transform=torchvision.transforms.ToTensor(),
download=True)
test_data = torchvision.datasets.CIFAR10(root="repetition/dataset1", train=False,
transform=torchvision.transforms.ToTensor(),
download=True)
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集的长度为:{}".format((train_data_size)))
print("测试数据集的长度为:{}".format((test_data_size)))
# 利用 DataLoader 加载数据集
train_dataLoader = DataLoader(dataset=train_data, batch_size=64)
test_dataLoader = DataLoader(dataset=test_data, batch_size=64)
# 加载网络模型
model = Model()
# 损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
learning_rate = 1e-2 # 1e-2=1*(10)^(-2)=1/100=0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# 记录训练网络的一些参数
# 训练次数和测试次数
total_train_step = 0
total_test_step = 0
# 训练轮数
epoch = 10
# 添加 tensorboard
write = SummaryWriter("logs")
for i in range(epoch):
print("---------第{}轮训练开始---------".format(i + 1))
# 训练步骤开始
model.train()
for data in train_dataLoader:
imgs, targets = data
output = model(imgs)
loss = loss_fn(output, targets)
# 优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
write.add_scalar("train_loss", loss.item(), total_train_step)
# 测试步骤开始
model.eval()
total_test_loss = 0
total_accuracy = 0
with torch.no_grad(): # 禁用梯度计算的上下文管理器--测试过程中不需要进行调优
for data in test_dataLoader:
imgs, targets = data
output = model(imgs)
loss = loss_fn(output, targets)
total_test_loss = total_test_loss + loss
# 正确率
accuracy = (output.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的loss:{}".format(total_test_loss))
accuracy_rate = total_accuracy / test_data_size
print("整体测试集上的正确率:{}".format(accuracy_rate))
write.add_scalar("test_loss", total_test_loss, total_test_step)
write.add_scalar("test_accuracy", accuracy_rate, total_test_step)
total_test_step = total_test_step + 1
torch.save(model, "model_{}.pth".format(i))
print("模型已保存")
write.close()
1.6.12 利用GPU训练
使用以下句式进行gpu加速:
# 方法一
# 加载网络模型
model = Model()
if torch.cuda.is_available():
model = model.cuda()
# 方法一
device = torch.device("cuda")
# 加载网络模型
model = Model()
model = model.to(device)
1.6.13 完整的模型验证