读取数据集(Dataset类)
from torch.utils.data import Dataset
from PIL import Image
import os
# 继承于Dataset的一个类
class MyData(Dataset):
# __init__的作用:提供一些全局变量
# 一般来说函数内部的变量在其他地方不能使用,这样就可以在其他函数内部使用
def __init__(self, root_dir, label_dir):
self.root_dir = root_dir
self.label_dir = label_dir
# 拼接文件路径
self.path = os.path.join(self.root_dir, self.label_dir)
# 图片名的列表
self.img_path = os.listdir(self.path)
# 通过索引获取图片
def __getitem__(self, index):
img_name = self.img_path[index]
# 图片路径
img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
# 包括size等信息
img = Image.open(img_item_path)
label = self.label_dir
return img, label
# 图片的数量
def __len__(self):
return len(self.img_path)
root_dir = "dataset/train"
ants_label_dir = "ants"
bees_label_dir = "bees"
ants_dataset = MyData(root_dir, ants_label_dir)
bees_dataset = MyData(root_dir, bees_label_dir)
train_dataset = ants_dataset + bees_dataset
标签有时候应该保存在一个txt文件里面
TensorBoard的使用
SummaryWriter
add_scalar()的使用
from torch.utils.tensorboard import SummaryWriter
# 事件文件存储到logs目录下
writer = SummaryWriter("logs")
for i in range(100):
# 参数1:图像标题,参数2:y轴,参数3:x轴
writer.add_scalar("y=2x", 2 * i, i)
writer.close()
如何打开事件文件
默认打开的端口是6006端口,为了防止和别人抢占同一个端口,可以自己指定一个端口进行使用
在python终端输入:tensorboard --logdir=logs --port=6007
add_imag()的使用
from torch.utils.tensorboard import SummaryWriter
from PIL import Image
import numpy as np
writer = SummaryWriter("logs")
# image_path = "dataset/train/ants/0013035.jpg"
image_path = "dataset/train/bees/16838648_415acd9e3f.jpg"
# 数据为PIL格式
img_PIL = Image.open(image_path)
# 把数据转换为numpy格式
image_array = np.array(img_PIL)
print(type(image_array)) # <class 'numpy.ndarray'>
# writer.add_image("test", image_array, 1, dataformats='HWC')
# 注意要说明三个通道分别代表什么,不然会报错:dataformats='HWC'
writer.add_image(tag="test", img_tensor=image_array, global_step=2, dataformats='HWC')
writer.close()
打开事件文件的方式同上:
在python终端输入:tensorboard --logdir=logs --port=6007
global_step可以在一个标签下展示多张图片,在浏览器上可以拖到查看
展示多张图片可以通过设置不同的tag
Transforms的使用
transform的结构
transform就是一个写好的python文件(transform.py),里面有很多的类,可以用来处理图片对象
ToTensor类的使用
from torchvision import transforms
from PIL import Image
img_path = "dataset/train/ants/0013035.jpg"
img = Image.open(img_path)
# 创建ToTensor类的一个对象
tensor_trans = transforms.ToTensor()
# 转换为tensor的数据类型
tensor_img = tensor_trans(img)
print(tensor_img)
使用SummaryWriter展示tensor
为什么要使用tensor数据类型?
tensor数据类型包含了神经网络的一些常用参数,在训练的时候会使用到
from torchvision import transforms
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
img_path = "dataset/train/ants/0013035.jpg"
img = Image.open(img_path)
# 创建ToTensor类的一个对象
tensor_trans = transforms.ToTensor()
# 转换为tensor的数据类型
tensor_img = tensor_trans(img)
writer = SummaryWriter("logs")
writer.add_image("Tensor_img", tensor_img)
writer.close()
Normalize类的使用
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
writer = SummaryWriter("logs")
img = Image.open("dataset/train/bees/16838648_415acd9e3f.jpg")
# Tensor
trans_tensor = transforms.ToTensor()
img_tensor = trans_tensor(img)
writer.add_image(tag="Tensor", img_tensor=img_tensor)
# Normalize
print(img_tensor[0][0][0])
trans_norm = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
img_norm = trans_norm(img_tensor)
print(img_norm[0][0][0])
writer.add_image("Normalize", img_norm, global_step=1)
writer.close()
通过把标准差和方差设置为0.5,可以把数据从[0,1]转到[-1,1]这个区间
Resize类的使用
接收的输入的类型为PIL,resize以后的输出也是PIL类型,在浏览器展示,需要转换为tensor类型
from PIL import Image
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
img_path = "dataset/train/ants/0013035.jpg"
img = Image.open(img_path)
# Resize
trans_resize = transforms.Resize((512, 512))
# img PIL->resize->img_resize PIL
img_resize = trans_resize(img)
print(img_resize) # <PIL.Image.Image image mode=RGB size=512x512 at 0x180B1A586D0>
trans_tensor = transforms.ToTensor()
# img_resize PIL->tensor->img_resize tensor
img_resize = trans_tensor(img_resize)
print(img_resize)
writer = SummaryWriter("logs")
writer.add_image(tag="Resize", img_tensor=img_resize, global_step=0)
Compose类的使用
Compose类的参数是一个列表,列表前一个参数的输出作为下一个参数的输入,要注意上一个参数的输出类型是否对应下一个参数的输入类型
from PIL import Image
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
img_path = "dataset/train/ants/0013035.jpg"
img = Image.open(img_path)
trans_tensor = transforms.ToTensor()
trans_resize = transforms.Resize((512, 512))
# compose
trans_compose = transforms.Compose([trans_resize, trans_tensor])
img_resize = trans_compose(img)
writer = SummaryWriter("logs")
writer.add_image(tag="Resize", img_tensor=img_resize, global_step=0)
# 原图
img_tensor = trans_tensor(img)
writer.add_image(tag="原图", img_tensor=img_tensor, global_step=0)
writer.close()
RandomCrop类的使用
from PIL import Image
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
img_path = "dataset/train/ants/0013035.jpg"
img = Image.open(img_path)
trans_random = transforms.RandomCrop((512, 512))
trans_tensor = transforms.ToTensor()
trans_compose = transforms.Compose([trans_random, trans_tensor])
writer = SummaryWriter("logs")
for i in range(10):
img_random = trans_compose(img)
writer.add_image(tag="Random", img_tensor=img_random, global_step=i)
writer.close()
torchvision中数据集的使用
import torchvision
from torch.utils.tensorboard import SummaryWriter
dataset_transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
])
# 可以通过设置transform参数把数据集转换为tensor数据类型的数据集,默认下载下来是PIL数据类型的数据集
train_set = torchvision.datasets.CIFAR10(root="./dataset", train=True, transform=dataset_transform, download=True)
test_set = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=dataset_transform, download=True)
writer = SummaryWriter("logs")
for i in range(10):
img, target = test_set[i]
writer.add_image("test", img_tensor=img, global_step=i)
writer.close()
DataLoader类的使用
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
test_data = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=torchvision.transforms.ToTensor())
# batch_size:一次性加载多少张图片
# shuffle:每次加载是否打乱顺序
# num_works:指定加载数据的子进程数量,可以显著提高加载数据的速度,特别是在处理大型数据集的时候
# 加载到最后图片的数量不足batch_size张的时候,是否把不足的这些去掉
test_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=True, num_workers=0, drop_last=False)
writer = SummaryWriter("dataloader")
for epoch in range(2):
step = 0
for data in test_loader:
imgs, targets = data
writer.add_images("Epoch:{}".format(epoch), img_tensor=imgs, global_step=step)
step += 1
writer.close()
指定shuffle为True和False时
指定drop_last为True和False时
神经网络的基本骨架--nn.Module的使用
在nn.Module类中,它的__call__函数调用了forward函数,所以这里不用写mymodule.forward(x),也会去自动调用forward函数
属性赋值:
= _call_impl
这部分是将一个名为_call_impl
的对象(很可能是一个函数或方法)赋值给_call__
属性。这意味着_call__
现在引用了_call_impl
的实现。
from torch import nn
import torch
class MyModule(nn.Module):
def __init__(self):
super().__init__()
def forward(self, input):
output = input + 1
return output
mymodule = MyModule()
x = torch.tensor(1.0)
output = mymodule(x)
print(output)
卷积操作(conv)
import torch
import torch.nn.functional as F
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
[0, 1, 0],
[2, 1, 0]])
input = torch.reshape(input, shape=(1, 1, 5, 5))
kernel = torch.reshape(kernel, shape=(1, 1, 3, 3,))
print(input.shape)
print(kernel.shape)
output1 = F.conv2d(input, kernel, stride=1)
print("output1")
print(output1)
output2 = F.conv2d(input, kernel, stride=2)
print("output2")
print(output2)
# stride:步长,padding:填充
output3 = F.conv2d(input, kernel, stride=1, padding=1)
print("output3")
print(+output3)
为什么要将二维矩阵reshape到四个维度?
conv2的weight参数要求四个维度
padding代表什么?指在四周填充零,成为一个行和列分别为KH+1和KW+1的矩阵
神经网络--卷积层
import torch
from torch.nn import Conv2d
from torch.utils.data import DataLoader
import torchvision
from torch import nn
from torch.utils.tensorboard import SummaryWriter
class MyModule(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self, x):
x = self.conv1(x)
return x
test_data = torchvision.datasets.CIFAR10(root="../dataset", train=False, transform=torchvision.transforms.ToTensor())
dataset = DataLoader(dataset=test_data, batch_size=64)
mymodule = MyModule()
writer = SummaryWriter("../logs")
step = 0
for data in dataset:
imgs, targets = data
output = mymodule(imgs)
print("原图")
print(imgs.shape)
print("处理后")
print(output.shape)
writer.add_images("input", imgs, step)
# writer只能显示三通道的,进行修改,第一个参数设置为-1,表示根据后面的数值进行计算
output = torch.reshape(output, shape=(-1, 3, 30, 30))
writer.add_images("output", output, step)
step += 1
writer.close()
神经网络--最大池化的使用
最大池化的作用:
可以显著的减少数据的量,加快训练的速度,类比于视频的清晰度,从1080p->720p
import torchvision
from torch import nn
from torch.nn import MaxPool2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
class MyModule(nn.Module):
def __init__(self):
super().__init__()
# ceil_mode等于true代表在不够kernel_size*kernel_size的时候,也取一个最大的进行保留
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
def forward(self, input):
output = self.maxpool1(input)
return output
mymodule = MyModule()
writer = SummaryWriter("../logs")
dataset = torchvision.datasets.CIFAR10(root="../dataset", train=False, transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset=dataset, batch_size=64)
step = 0
for data in dataloader:
imgs, targets = data
writer.add_images(tag="input", img_tensor=imgs, global_step=step)
output = mymodule(imgs)
writer.add_images(tag="output", img_tensor=output, global_step=step)
step += 1
writer.close()
神经网络--非线性激活
import torchvision
from torch import nn
from torch.nn import ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
class MyModule(nn.Module):
def __init__(self):
super().__init__()
self.relu1 = ReLU()
# self.sigmoid1=Sigmoid()
def forward(self, input):
output = self.relu1(input)
return output
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset=dataset, batch_size=64)
writer = SummaryWriter("../logs")
mymodule = MyModule()
step = 0
for data in dataloader:
imgs, targets = data
writer.add_images(tag="input", img_tensor=imgs, global_step=step)
output = mymodule(imgs)
writer.add_images(tag="output", img_tensor=output, global_step=step)
step += 1
writer.close()
sigmoid
其他层
可以使用官方搭建好的一些框架
Sequential的使用和搭建示例
import torch
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.utils.tensorboard import SummaryWriter
class MyModule(nn.Module):
def __init__(self):
super().__init__()
self.model1 = Sequential(
Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(in_features=1024, out_features=64),
Linear(in_features=64, out_features=10)
)
def forward(self, input):
output = self.model1(input)
return output
writer = SummaryWriter("../logs")
mymodule = MyModule()
print(mymodule)
input = torch.ones((64, 3, 32, 32))
output = mymodule(input)
print(output.shape)
writer.add_graph(mymodule, input)
writer.close()
损失函数与反向传播
import torch
from torch import nn
inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)
inputs = torch.reshape(inputs, shape=(1, 1, 1, 3))
targets = torch.reshape(targets, shape=(1, 1, 1, 3))
# reduction:sum/mean,选择mean会在sum的基础上再除以个数
# loss_l1 = nn.L1Loss(reduction="sum")
loss_l1 = nn.L1Loss(reduction="mean")
result_l1 = loss_l1(inputs, targets)
# loss_mse = nn.MSELoss(reduction="sum")
loss_mse = nn.MSELoss(reduction="mean")
result_mse = loss_mse(inputs, targets)
print("loss of L1Loss")
print(result_l1)
print("loss of MSELoss")
print(result_mse)
网络模型的保存与读取
保存
import torch
import torchvision
vgg16 = torchvision.models.vgg16()
# 保存方式1:模型结构+模型参数
torch.save(obj=vgg16, f="vgg16_method1.pth")
# 保存方式2:模型参数(官方推荐:节省内存)
torch.save(obj=vgg16.state_dict(), f="vgg16_method2.pth")
读取
import torch
import torchvision
# 保存方式1加载模型
model1 = torch.load("vgg16_method1.pth")
print(model1)
# 保存方式2加载模型
vgg16 = torchvision.models.vgg16()
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
print(vgg16)
补充知识
python中__call__函数的作用
class Person:
def __call__(self, name):
print("__call__" + "hello" + name)
def hello(self, name):
print("hello" + name)
person = Person()
person("zhangsan")
person.hello("lisi")
与普通函数的区别在于,__call__相当于是内置函数,可以直接"对象名(参数)"的形式调用,而普通的函数则要"对象名.函数名(参数)"的形式调用