继上篇 dataloader
import torchvision
from torch.utils.data import DataLoader
# 准备的测试数据集
from torch.utils.tensorboard import SummaryWriter
test_data = torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor())
# loader专门用来加载我们的数据集
test_loader = DataLoader(dataset=test_data,batch_size=64,shuffle=True,num_workers=0,drop_last=True)
# shuffle=False : epoch=1时和epoch=0时,洗完牌牌的顺序一样
# shuffle=True : epoch=1时和epoch=0时,洗完牌牌的顺序不一样
# drop_last=True : 舍弃后面剩余的图片
# drop_last=True : 不舍弃
# 测试数据集中的第一张图片及target
img,target = test_data[0]
print(img.shape)
print(target)
writer = SummaryWriter("dataloader")
for epoch in range(2):
step = 0
for data in test_loader:
imgs,targets = data
# print(imgs.shape)
# 输出结果torch.Size([4, 3, 32, 32]) 四张图片一起打包,三通道(彩色图片),32x32大小
# print(targets)
# tensor([0, 6, 1, 2]) 输出这四张图片的target
# 注意:取四张图片的时候不是按顺序取的,是随机取的。因为DataLoader中有一个叫sampler的采样器,使用的是RandomSampler,随机抓取四张图
writer.add_images("epoch:{}".format(epoch), imgs, step)
step =step+1
writer.close()
神经网络的基本骨架——nn.Module的使用
关于神经网络的官方文档位于pytorch.org 中的Docs里面的API中的torch.nn中。
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 20, 5)
self.conv2 = nn.Conv2d(20, 20, 5)
def forward(self, x):
x = F.relu(self.conv1(x))
return F.relu(self.conv2(x))
forwar()定义了计算,在所有子类中都要进行重写。
import torch
from torch import nn
class Tudui(nn.Module):
def __init__(self):
super().__init__()
def forward(self,input):
output = input +1
return output
tudui = Tudui()
x = torch.tensor(1.0)
output = tudui(x)
print(output)
卷积操作
Convolution Layers卷积层
- 输入
- 卷积核
- bias
- 步径
- 边距
import torch
import torch.nn.functional as F
input = torch.tensor([[1,2,0,3,1],
[0,1,2,3,1],
[1,2,1,0,0],
[5,2,3,1,1],
[2,1,0,1,1]])
kernel = torch.tensor([[1,2,1],
[0,1,0],
[2,1,0]])
input = torch.reshape(input,(1,1,5,5))
kernel = torch.reshape(kernel,(1,1,3,3))
print(input.shape)
print(kernel.shape)
output = F.conv2d(input,kernel,stride=1)
print(output)
output1 = F.conv2d(input,kernel,stride=1,padding=3)
print(output1)
卷积层的使用
如果输入图像拿一个卷积层对它进行一次卷积时,就会得到out_channel=1,如果out_channel=2,卷积层就会生成两个卷积核,就会又得到一个输出,这两个卷积核不一定是一样大的。
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("./data",train=False,transform=torchvision.transforms.ToTensor(),download=True)
dataloader = DataLoader(dataset,batch_size=64)
class Tudui(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv2d(in_channels=3,out_channels=6,kernel_size=3,stride=1,padding=0)
def forward(self,x):
x = self.conv1(x)
return x
tudui = Tudui()
print(tudui)
writer = SummaryWriter("logs")
step=0
for data in dataloader:
imgs,targets = data
output = tudui(imgs)
print(imgs.shape)
print(output.shape)
# torch.Size([64, 3, 32, 32])
writer.add_images("input",imgs,step)
# torch.Size([64, 6, 30, 30]) ->[xxx,3,30,30],batchsize写-1,根据后面的值自动计算
output = torch.reshape(output,(-1,3,30,30))
writer.add_images("output",output,step)
step = step+1
writer.close()
维持了尺寸,是对padding进行了填充,如果没有给出N:batchsize或者stride,可以用这个公式进行计算
神经网络——最大池化的使用
Pooling layers
最大池化MAXPOOL有时也被称为下采样,unpool上采样
MAXPOOL2D
dilation:
ceil_mode:
选池化核所匹配的九个数字中最大的一个,stride=3,ceil_mode=True,就保留这六个数,选其中的最大值。默认情况下,ceil_mode是false。
import torch
from torch import nn
from torch.nn import MaxPool2d
input = torch.tensor([[1,2,0,3,1],
[0,1,2,3,1],
[1,2,1,0,0],
[5,2,3,1,1],
[2,1,0,1,1]])
input = torch.reshape(input,(-1,1,5,5))
print(input.shape)
class Tudui(nn.Module):
def __init__(self):
super().__init__()
self.maxpool1 = MaxPool2d(kernel_size=3,ceil_mode=True)
def forward(self,input):
output = self.maxpool1(input)
return output
tudui = Tudui()
output = tudui(input)
print(output)
报错:RuntimeError: "max_pool2d" not implemented for 'Long'
修改:
input = torch.tensor([[1,2,0,3,1],
[0,1,2,3,1],
[1,2,1,0,0],
[5,2,3,1,1],
[2,1,0,1,1]],dtype=torch.float32)
加上dtype,input就变成了浮点型数据类型。
为什么要进行最大池化呢?
最大池化的目的就是保留输入的特征同时减少数据量,加快训练。举例理解:输入为1080P的图像,经过池化之后,就变成了720P的图像,能传达视频内容的同时,文件尺寸会大大缩小。
神经网络——非线性激活
Relu
Sigmoid
import torch
import torchvision
from torch import nn
from torch.nn import ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
input = torch.tensor([[1,-0.5],
[-1,3]])
input = torch.reshape(input,(-1,1,2,2))
print(input.shape)
dataset = torchvision.datasets.CIFAR10("./data",train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset,batch_size=64)
class Tudui(nn.Module):
def __init__(self):
super().__init__()
self.relu1 = ReLU()
self.sigmoid1 = Sigmoid()
def forward(self,input):
output = self.sigmoid1(input)
return output
writer = SummaryWriter("./logs_relu")
tudui = Tudui()
# output = tudui(input)
# print(output)
step = 0
for data in dataloader:
imgs,targets = data
writer.add_images("input",imgs,step)
output = tudui(imgs)
writer.add_images("output", output, step)
step = step+1
writer.close()
非线性变换的主要目的是给网络中引入一些非线性特征,因为非线性越多,才能训练出符合各种曲线或者符合各种特征的模型,如果大家都是直愣愣的,那么模型的泛化能力就不够好。
线形层
使用flatten
import torch
import torchvision
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("./data",train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset,batch_size=64,drop_last=True)
class Tudui(nn.Module):
def __init__(self):
super().__init__()
self.linear1 = Linear(196608,10)
def forward(self,input):
output = self.linear1(input)
return output
tudui = Tudui()
writer = SummaryWriter("logs_linear")
step = 0
for data in dataloader:
imgs,targets = data
print(imgs.shape)
# imgs2 = torch.reshape(imgs,(1,1,1,-1))
imgs2 = torch.flatten(imgs)
print(imgs2.shape)
# writer.add_images("input",imgs2,global_step=step)
output = tudui(imgs2)
print(output.shape)
# writer.add_images("output", output, global_step=step)
step = step+1
writer.close()
# reshape功能更加强大,可以指定尺寸进行变换,flatten就会把它变成一行,摊平
# 线性层实质上是矩阵相乘,不舍弃最后数据的时候不满足矩阵运算法则,所以drop_last要设置成True