1.torchvision中的数据集使用
▲pytorch官网
官网链接:https://pytorch.org/
进入torchvision
在Package Reference中
torchvision.datasets :torchvision中给的一些数据集,例如COCO等。
继续往下滑
▲torchvision.datasets
以cifar-10为例
import torchvision
train_set = torchvision.datasets.CIFAR10(root="./datasets", train=True, download=True) # root 根目录,train=True 作为训练集,download=True 下载
test_set = torchvision.datasets.CIFAR10(root="./datasets", train=False, download=True)
运行后下载:
下载成功
若速度太慢,可将图中的蓝色链接复制到迅雷中下载
下载后,可以将cifar-10-python.tar.gz这个文件复制到datasets文件夹里,再run
另外,按住ctrl点击CIFAR10进入文档,文档里有url
import torchvision
train_set = torchvision.datasets.CIFAR10(root="./datasets", train=True, download=True) # root 根目录,train=True 作为训练集,download=True 下载
test_set = torchvision.datasets.CIFAR10(root="./datasets", train=False, download=True)
print(test_set[0]) # (<PIL.Image.Image image mode=RGB size=32x32 at 0x2E8E682A1D0>, 3) 3代表target,每个数字对应classes中的种类。
# 即(图片,target)类型
print(test_set.classes) # ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
img, target = test_set[0]
print(img) # <PIL.Image.Image image mode=RGB size=32x32 at 0x186D16EA1D0>
print(target) # 3,也就是cat
print(test_set.classes[target]) # cat
img.show()
加上transforms
import torchvision
from tensorboardX import SummaryWriter
from torchvision import transforms
trans_dataset = transforms.Compose([
transforms.ToTensor()
])
train_set = torchvision.datasets.CIFAR10(root="./datasets", train=True, transform=trans_dataset, download=True) # root 根目录,train=True 作为训练集,download=True 下载
test_set = torchvision.datasets.CIFAR10(root="./datasets", train=False, transform=trans_dataset, download=True)
print(test_set[0]) # tensor
writer = SummaryWriter("p14")
for i in range(10):
img, target = test_set[i] # 注意这一步,别傻傻的writer.add_image("cifar10-0", test_set[i], i)
writer.add_image("cifar10-0", img, i)
writer.close()
2.DataLoader
取数据
▲参数:
▲参数翻译:
(从博主那里获取,仅做学习使用)
▲batch_size=4
import torchvision
from torch.utils.data import DataLoader
test_data = torchvision.datasets.CIFAR10("./datasets", train=True, transform=torchvision.transforms.ToTensor())
# batch_size=4 :每次从test_data中拿出四个数据集进行打包
test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=True, num_workers=0, drop_last=False)
# 测试数据集中第一张图片及target
img, target = test_data[0]
print(img.shape) # torch.Size([3, 32, 32])
print(target) # 6
for data in test_loader:
imgs, targets = data
print(imgs.shape) # torch.Size([4, 3, 32, 32])
print(targets) # tensor([7, 2, 5, 0])
▲ drop_last=True或者False的区别
batch_size=64 在 SummaryWriter显示
import torchvision
from tensorboardX import SummaryWriter
from torch.utils.data import DataLoader
test_data = torchvision.datasets.CIFAR10("./datasets", train=True, transform=torchvision.transforms.ToTensor())
# batch_size=4 :每次从test_data中拿出四个数据集进行打包
test_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=True, num_workers=0, drop_last=True)
# 测试数据集中第一张图片及target
img, target = test_data[0]
print(img.shape) # torch.Size([3, 32, 32])
print(target) # 6
writer = SummaryWriter("dataloader_logs")
step = 0
for data in test_loader:
imgs, targets = data
writer.add_images("dataloader_drop_last", imgs, step) # 注意是add_images
step = step + 1
writer.close()
运行后可以看到一个batch有64个图:
注意drop_last是否为True的区别:
▲shuffle=False 或者True
import torchvision
from tensorboardX import SummaryWriter
from torch.utils.data import DataLoader
test_data = torchvision.datasets.CIFAR10("./datasets", train=True, transform=torchvision.transforms.ToTensor())
# batch_size=4 :每次从test_data中拿出四个数据集进行打包
test_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=False, num_workers=0, drop_last=True)
# 测试数据集中第一张图片及target
img, target = test_data[0]
print(img.shape) # torch.Size([3, 32, 32])
print(target) # 6
writer = SummaryWriter("dataloader_logs")
for epoch in range(2):
step = 0
for data in test_loader:
imgs, targets = data
writer.add_images("Epoch_false_:{}".format(epoch), imgs, step) # 注意是add_images
step = step + 1
writer.close()
shuffle=False:
shuffle=True:
3.神经网络的基本骨架——nn.Module的使用
▲官网
点击containers
点击Module
▲nn.module的写法
你写的所有module都要继承nn.Module
在写这两行的时候有两种方法:
(1)自己写,看文档、
(2)点击最上方的Code——>Generate——>Override Methods,就会自动重写该方法
最终代码要重写__init__和forward
import torch
from torch import nn
class first_module(nn.Module):
def __init__(self) -> None:
super().__init__()
def forward(self, input):
output = input + 1
return output
first = first_module() # 执行__init__
x = torch.tensor(1.0) # 定义x
output = first(x) # 执行forward
print(output) # tensor(2.)
这里可以通过断点来查看内部是怎么运行的
4.卷积nn.conv
▲官网:
▲input weight的维度问题 reshape:
此处input的shape要求是四维,每维都有自己的含义
import torch
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
[0, 1, 0],
[2, 1, 0]])
print(input.shape) # torch.Size([5, 5])
print(kernel.shape) # torch.Size([3, 3])
可以看到,input和kernel的shape都只是两维,不符合要求
利用reshape更改
import torch
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
[0, 1, 0],
[2, 1, 0]])
print(input.shape) # torch.Size([5, 5])
print(kernel.shape) # torch.Size([3, 3])
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
print(input.shape) # torch.Size([1, 1, 5, 5])
print(kernel.shape) # torch.Size([1, 1, 3, 3])
进行卷积
import torch
import torch.nn.functional as F
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
[0, 1, 0],
[2, 1, 0]])
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
output = F.conv2d(input, kernel, stride=1)
print(output)
输出
▲如果stride = 2 即步长为2
output = F.conv2d(input, kernel, stride=2)
则输出
▲如果padding=1
进行填充
output3 = F.conv2d(input, kernel, stride=1, padding=1)
则输出
5.神经网络:卷积层
一个比较完整地神经网络输入输出
这里输入的尺寸是:torch.Size([64, 3, 32, 32]),输出是torch.Size([64, 6, 30, 30])
import torch
import torchvision
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import Conv2d
from tensorboardX import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../datasets", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)
class second_module(nn.Module):
def __init__(self) -> None:
super().__init__()
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self,x):
x = self.conv1(x)
return x
test_module = second_module()
print(test_module) # second_module( (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1)) )
for data in dataloader:
imgs, targets = data
output = test_module(imgs) # 此时imgs是图片的tensor类型
print(imgs.shape) # torch.Size([64, 3, 32, 32])
print(output.shape) # torch.Size([64, 6, 30, 30])
利用tensorboardX显示出来,改一下部分代码
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
imgs, targets = data
output = test_module(imgs) # 此时imgs是图片的tensor类型
print(imgs.shape) # torch.Size([64, 3, 32, 32])
print(output.shape) # torch.Size([64, 6, 30, 30])
writer.add_images("input", imgs, step)
writer.add_images("output", output, step) # 这样会报错,因为六个chanel没法显示
step = step + 1
报错了,因为6个chanel没法显示
所以我们想把输出的[64, 6, 30, 30]调整reshape成[xx, 3, 30, 30],这里的xx要根据实际的进行调整,所以完整的reshape语句用-1代替xx,如下
output = torch.reshape(output, (-1, 3, 30, 30))
完整代码:
import torch
import torchvision
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import Conv2d
from tensorboardX import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../datasets", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)
class second_module(nn.Module):
def __init__(self) -> None:
super().__init__()
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self,x):
x = self.conv1(x)
return x
test_module = second_module()
print(test_module) # second_module( (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1)) )
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
imgs, targets = data
output = test_module(imgs) # 此时imgs是图片的tensor类型
print(imgs.shape) # torch.Size([64, 3, 32, 32])
print(output.shape) # torch.Size([64, 6, 30, 30])
writer.add_images("input", imgs, step)
# writer.add_images("output", output, step) # 这样会报错,因为六个chanel没法显示
output = torch.reshape(output, (-1, 3, 30, 30))
writer.add_images("output", output, step)
step = step + 1
命令行:tensorboard --logdir=logs
注意路径:
显示:
6.神经网络:最大池化
ceil_model具体解析:
True的时候填充,False不填充
import torch
from torch import nn
from torch.nn import MaxPool2d
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
input = torch.reshape(input, (-1, 1, 5, 5))
print(input.shape)
class maxpool_test(nn.Module):
def __init__(self) -> None:
super().__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
def forward(self, input):
output = self.maxpool1(input)
return output
test = maxpool_test()
output = test(input)
print(output)
结果报错
“max_pool2d” not implemented for ‘Long’
所以把输入的数据改成浮点型
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]], dtype=torch.float32)
总的代码是
import torch
from torch import nn
from torch.nn import MaxPool2d
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]], dtype=torch.float32)
input = torch.reshape(input, (-1, 1, 5, 5))
print(input.shape)
class maxpool_test(nn.Module):
def __init__(self) -> None:
super().__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
def forward(self, input):
output = self.maxpool1(input)
return output
test = maxpool_test()
output = test(input)
print(output)
用tensorboardX显示
import torch
import torchvision
from tensorboardX import SummaryWriter
from torch import nn
from torch.nn import MaxPool2d
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../datasets", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)
class maxpool_test(nn.Module):
def __init__(self) -> None:
super().__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=False)
def forward(self, input):
output = self.maxpool1(input)
return output
test = maxpool_test()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
imgs, targets = data
output = test(imgs)
writer.add_images("input_maxpool", imgs, step)
writer.add_images("output_maxpool", output, step)
step = step + 1
writer.close()
7.神经网络:非线性激活
其中inplace的意义如下:
ReLU:
import torch
from torch import nn
from torch.nn import ReLU
input = torch.tensor([[1, -0.5],
[-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2)) # torch.Size([1, 1, 2, 2])
class test_relu(nn.Module):
def __init__(self) -> None:
super(test_relu, self).__init__()
self.relu1 = ReLU()
def forward(self, input):
output = self.relu1(input)
return output
test = test_relu()
output = test(input)
print(output) # tensor([[[[1., 0.],[0., 3.]]]])
利用sigmoid和tensorboardX
import torch
import torchvision
from tensorboardX import SummaryWriter
from torch import nn
from torch.nn import ReLU, Sigmoid
from torch.utils.data import DataLoader
class test_relu(nn.Module):
def __init__(self) -> None:
super(test_relu, self).__init__()
self.relu1 = ReLU()
self.sigmoid1 = Sigmoid()
def forward(self, input):
output = self.sigmoid1(input)
return output
dataset = torchvision.datasets.CIFAR10("../datasets", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)
test = test_relu()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
imgs, targets = data
output = test(imgs)
writer.add_images("input_sigmoid1", imgs, step)
writer.add_images("output_sigmoid1", output, step)
step = step + 1
writer.close()
8.神经网络:线性层
以vgg16的这个层为例
我们的目标就是把55的图展成一个横的251的图,再继续
同时可以注意一下展平函数
import torch
import torchvision
from tensorboardX import SummaryWriter
from torch import nn
from torch.nn import ReLU, Sigmoid, Linear
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../datasets", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)
class test_linear(nn.Module):
def __init__(self) -> None:
super(test_linear, self).__init__()
self.linear1 = Linear(196608,10)
def forward(self, input):
output = self.linear1(input)
return output
test = test_linear()
# writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
imgs, targets = data # torch.Size([64, 3, 32, 32]) 原图
output = torch.reshape(imgs, (1, 1, 1, -1)) # torch.Size([1, 1, 1, 196608]) 展平
output = torch.flatten(imgs) # 展平函数 torch.Size([196608])
output = test(output) # torch.Size([1, 1, 1, 10])
9.官方提供的一些模型
torchvision中的model:
有待探索