Pytorch学习笔记
一、必备的数据集操作技巧
1.数据集操作
下列代码对原始数据集进行了处理,方便了对数据集的访问
import torch
from torch.utils.data import Dataset
from PIL import Image
import os
class MyData(Dataset):
def __init__(self, root_dir, label_dir): # self 的作用是指定一个类里的全局变量
self.root_dir = root_dir
self.label_dir = label_dir
self.path = os.path.join(self.root_dir, self.label_dir) # join函数,连接路径
self.img_path = os.listdir(self.path)
def __getitem__(self, idex):
img_name = self.img_path[idex]
img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
img = Image.open(img_item_path)
label = self.label_dir
return img, label
def __len__(self):
return len(self.img_path) # 返回列表的长度
root_dir = "dataset/train"
positive_label_dir = "positive"
negative_label_dir = "negative"
positive_dataset = MyData(root_dir, positive_label_dir)
negative_dataset = MyData(root_dir, negative_label_dir)
train_dataset = positive_dataset + negative_dataset ##数据集的拼接操作
2.利用txt文件保存label
上述情况,label是体现在文件夹的名称内,而使用最多的情况通常是这样的
positive_label文件夹内包含了与图片同名的txt文件,打开txt文件就是该图片的label,实现给图片创建对应txt的代码如下:
import os
root_dir = "dataset/train"
target_dir = "positive"
img_path = os.listdir(os.path.join(root_dir,target_dir))
label = target_dir
out_dir = "positive_label"
for i in img_path:
file_name = i.split(".png")[0]
with open(os.path.join(root_dir,out_dir,"{}.txt".format(file_name)),"w") as f:
f.write(label)
二、Pytorch中tensorboard的使用
tensorboard我理解为一个看板,对结果进行可视化展示
1.add_scalar函数的用法
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter("logs") #创建类的一个实例 将事件文件(图像等)存到logs文件夹下
# writer.add_image()
for i in range(100):
writer.add_scalar("y=0.5x",i,2*i) #第一个i是y轴,第二个i是x轴,改变第一个参数可以避免几个图杂揉在一起的情况
writer.close()
#打开事件文件
#在Terminal窗口,输入命令:tensorboard --logdir=logs
#修改主机接口:tensorboard --logdir=logs --port=6007
2.add_image函数的用法
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from PIL import Image
writer = SummaryWriter("logs") #创建类的一个实例 将事件文件(图像等)存到logs文件夹下
image_path = "dataset/train/positive/1 (6).png"
img_PIL = Image.open(image_path)
img_array = np.array(img_PIL) #将PIL的文件格式转换为array格式,这样才能被add_image读取
#print(img_array.shape) array转化的图像通常是(H,W,C)格式的,因此要在add_image函数中说明
writer.add_image("test", img_array, 2, dataformats='HWC') #1是指global_step,这里是指第一步
#step可以展现出每一步的变化,非常炫酷
#for i in range(100):
#writer.add_scalar("y=0.5x",i,2*i) #第一个i是y轴,第二个i是x轴,改变第一个参数可以避免几个图杂揉在一起的情况
writer.close()
#打开事件文件
#在Terminal窗口,输入命令:tensorboard --logdir=logs
#修改主机接口:tensorboard --logdir=logs --port=6007
三、transform的用法
transform可以理解为一个工具箱,里面有很多工具(class类)
1.基础知识
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import cv2
#transforms相当于一个工具箱,里面有很多工具
#pycharm的左侧有个”structure“,可以方便的看到源代码的结构,相当于工具箱的说明书,前面几个类是最常用的
####python中的用法
###通过transform.ToTensor 去解决两个问题
###1.transform如何使用
###2.tensor的数据类型相较于其他数据类型有啥区别
img_path = "dataset/train/positive/1 (1).png"
img = Image.open(img_path) #这是PIL格式转化为tensor格式的方法
# print(img) <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=96x160 at 0x17C1459D240>
tensor_trans = transforms.ToTensor() #创建实例:tensor_trans
tensor_img = tensor_trans(img) #将PIL格式的图片转化为tensor数据类型的图片
###2.tensor数据类型内多了很多卷积神经网络需要的参数,这是其他数据类型不具备的特点
cv_img = cv2.imread(img_path) #数据类型是numpy array
writer = SummaryWriter("logs")
writer.add_image("Tensor_img", tensor_img, 1)
writer.close()
下面介绍python中类的概念,关于__call__函数的用法
class Person:
def __call__(self, name):
print("__call__"+"hello"+" "+name)
def hello(self, name):
print("hello"+" "+name)
person = Person()
person("zyj") #这种调用方法是直接传入一个参数,默认就会传递给__call__函数
person.hello("zy") #这种调用方法需要用点后接需指定的函数
2.常见的transforms
下面介绍了transforms中常见的类的用法
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from PIL import Image
writer = SummaryWriter("logs")
img = Image.open("dataset/train/positive/1 (14).jpg")
#ToTensor的使用
trans_totensor = transforms.ToTensor()
img_tensor = trans_totensor(img)
writer.add_image("To_Tensor", img_tensor)
#Normalize的使用(归一化)
trans_norm = transforms.Normalize([6,5,0.5],[5,0.5,5]) ###第一个列表表示平均值
#,第二个列表表示标准差
##归一化的过程:
#计算公式:input(channel) = (input[channel] - mean[channel])/std[channel]
#其中,mean和std是transforms.Normalize中需要传入的参数(用户自定义)
#此处img_tensor的范围在[0,1],经过上述归一化后,范围变为了[-1,1]
img_norm = trans_norm(img_tensor)
writer.add_image("Normalize", img_norm, 1)
##Resize的使用 给定图片的尺寸。如果给两个数字,就会用长宽去匹配
#但是如果只给一个数字,就会用图片最小的边去匹配这个数字
print(img.size)
trans_resize = transforms.Resize((512, 512))
img_resize = trans_resize(img_tensor)
#print(img_resize)
writer.add_image("Resize", img_resize)
##Randomcrop的用法 随机裁剪
trans_random = transforms.RandomCrop(512)
trans_compose2 = transforms.Compose([trans_random, trans_totensor])
for i in range(10):
img_crop = trans_compose2(img)
writer.add_image("Randcrop", img_crop, i)
writer.close()
四、Dataloader的用法
#dataloader的作用是到dataset中取数据
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
test_data = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=True, num_workers=0, drop_last=False)
#batch_size表示每次抓牌的张数;shuffle表示第一次抓取和第二次抓取牌的顺序是否需要打乱
#如果为true表示两次抓取顺序不一样;num_workers表示进程数量,默认为0表示利用主进程
#drop_last表示牌的总数除以batch_size,如果除不尽的话,剩余的牌是否需要舍弃
#测试数据集的第一张图片及target
img, target = test_data[0]
print(img.shape)
print(target)
writer = SummaryWriter("dataloader")
step = 0
for data in test_loader:
imgs, targets = data
#print(imgs.shape)-->torch.Size([4, 3, 32, 32]) 四张图片,三个通道
#print(targets) -->tensor([0, 8, 6, 5])
writer.add_images("test_data", imgs, step)
step = step + 1
writer.close()
五、卷积层操作(一)
1d,2d,3d表示纬度,图片是二维的
input需要有四个信息参数,(minibatch,in_channels,iH,iW)
因此需要利用torch.reshape更改尺寸
import torch
import torch.nn.functional as F
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 1],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
kernel = torch.tensor([[1,2,1],
[0,1,0],
[2,1,0]])
input = torch.reshape(input, (1,1,5,5))
kernel = torch.reshape(kernel, (1,1,3,3))
output = F.conv2d(input,kernel,stride=1)
print(output)
'''
输出为四维是因为前面的reshape
tensor([[[[10, 12, 12],
[18, 16, 16],
[13, 9, 4]]]])
'''
##padding 对图像的边缘进行填充
output2 = F.conv2d(input,kernel,stride=1, padding=1)#默认填充0
print(output2)
'''
tensor([[[[ 1, 3, 4, 10, 8],
[ 5, 10, 12, 12, 7],
[ 7, 18, 16, 16, 9],
[11, 13, 9, 4, 6],
[14, 13, 9, 7, 4]]]])
'''
六、卷积层操作(二)
上下两个区别是什么?
kernel_size的参数不需要给出,会自动调整
in_channel很好理解,就是图片的通道数
out_channel需要自己设置,当和in_channel的通道数不一样时,卷积核的个数会不一样
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("./dataset",
train=False, transform=torchvision.transforms.ToTensor(),
download=False)
dataloader = DataLoader(dataset, batch_size=64)
class ZYJ(nn.Module):
def __init__(self):
super(ZYJ, self).__init__()
self.conv1 = Conv2d(in_channels=3,
out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self, x):
x = self.conv1(x)
return x
zyj = ZYJ()
print(zyj)
'''
ZYJ(
(conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
)'''
writer = SummaryWriter("juanji")
step = 0
for data in dataloader:
imgs, targets = data
output = zyj(imgs)
#print(imgs.shape) #->torch.Size([64, 3, 32, 32])
#print(output.shape) #->torch.Size([64, 6, 30, 30])
writer.add_images("input", imgs, step)
'''
此处经常报错
AssertionError: size of input tensor and input format are different.
解决办法是writer.add_image—>writer.add_images
'''
#output的通道数是6个,无法可视化,这里使用了一个不严谨的方法,将output进行reshape
output = torch.reshape(output, (-1, 3, 30, 30)) #这里的-1我们并不清楚
writer.add_images("output", output, step)
step = step + 1
writer.close()
上图为vgg16架构
1->2经过了一次卷积+函数激活操作,前后尺寸不变,说明对padding进行了调试,input_channel = 3, output_channel = 64
具体调试(计算方法)如下图
七、最大池化的使用
1.理解池化
参数:
(stride的默认值是核的尺寸)
dilation 空洞卷积。如图
cell_mode可以选择Floor 和 Ceiling模式,默认为False(Floor)模式
计算公式
import torch
from torch import nn
from torch.nn import MaxPool2d
input = torch.tensor([[1,2,0,3,1],
[0,1,2,3,1],
[1,2,1,0,0],
[5,2,3,1,1],
[2,1,0,1,1]], dtype=torch.float32)
#注意修改数据类型,否则报错:"max_pool2d" not implemented for 'Long'
input = torch.reshape(input, (-1,1,5,5))
class ZYJ(nn.Module):
def __init__(self):
super(ZYJ, self).__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
def forward(self, input):
output = self.maxpool1(input)
return output
zyj = ZYJ()
output = zyj(input)
print(output)
八、非线性激活
ReLU函数
import torch
from torch import nn
from torch.nn import ReLU
input = torch.tensor([[1, -0.5],
[-1, 3]])
output = torch.reshape(input, (-1, 1, 2, 2))
print(output.shape)
class ZYJ(nn.Module):
def __init__(self):
super(ZYJ, self).__init__()
self.relu1 = ReLU() #inplace默认为False
# 意思是不在原数据上改动
def forward(self, input):
output = self.relu1(input)
return output
zyj = ZYJ()
output = zyj(input)
print(output)
以下为对图像处理
import torch
import torchvision
from torch import nn
from torch.nn import ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10('dataset', train=False, download=False,
transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=64)
class ZYJ(nn.Module):
def __init__(self):
super(ZYJ, self).__init__()
self.relu1 = ReLU() #inplace默认为False
# 意思是不在原数据上改动
self.sigmoid1 = Sigmoid()
def forward(self, input):
output = self.sigmoid1(input)
return output
zyj = ZYJ()
writer = SummaryWriter("logs_sigmoid")
step = 0
for data in dataloader:
imgs, target = data
writer.add_images("input", imgs, global_step=step)
output = zyj(imgs)
writer.add_images("output", output, global_step=step)
step = step + 1
writer.close()
九、线性层及其他层的介绍
1.正则化层——BatchNorm2d
2.Recurrent Layers
用得不多
3.Transformer Layers
用得不多
4.Linear Layers(重点)
下图为线性层,对应的in_feature = d, out_feature = L ,bias = True
import torch
import torchvision
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("dataset", train=False, transform=torchvision.transforms.ToTensor()
, download=False)
dataloader = DataLoader(dataset, batch_size=64, drop_last=True)
class ZYJ(nn.Module):
def __init__(self):
super(ZYJ, self).__init__()
self.linear1 = Linear(196608,10)
def forward(self,input):
output = self.linear1(input)
return output
zyj = ZYJ()
for data in dataloader:
imgs, targets = data
print(imgs.shape)
output = torch.reshape(imgs, (1, 1, 1, -1))
#上行代码可以用 output = torch.flatten(imgs)替代
#效果一样,最后得到的数据维数不一样
print(output.shape)
output = zyj(output)
print(output.shape)
5.Dropout Layers
防止过拟合
6.Sparse Layers
自然语言处理使用得多
7.Distance Function
计算两个值的误差
8.Loss Function
…
十、搭建小实战和sequential的使用
网络结构搭建
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.tensorboard import SummaryWriter
class ZYJ(nn.Module):
def __init__(self):
super(ZYJ, self).__init__()
self.model = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)
def forward(self, input):
output = self.model(input)
return output
zyj = ZYJ()
input = torch.ones(64, 3, 32, 32)
output = zyj(input)
print(output.shape)
writer = SummaryWriter("logs_seq")
writer.add_graph(zyj, input)
writer.close()
十一、损失函数
import torch
from torch import nn
from torch.nn import L1Loss, MSELoss
####L1Loss
inputs = torch.tensor([1, 2, 3],dtype=torch.float32)
targets = torch.tensor([4, 2, 3],dtype=torch.float32)
inputs = torch.reshape(inputs, (1,1,1,3))
targets = torch.reshape(targets, (1,1,1,3))
loss = L1Loss(reduction='sum')
result = loss(inputs, targets)
print(result)
##MSELoss
loss_mse = MSELoss()
result_mse = loss_mse(inputs, targets)
print(result_mse)
##CrossEntropyLoss 交叉熵(原理没听懂)
x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])
x = torch.reshape(x, (1, 3))
loss_cross = nn.CrossEntropyLoss()
result_cross = loss_cross(x, y)
print(result_cross)
下面搭建了一个神经网络
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential, ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10(root="dataset", train=False, transform=torchvision.transforms.ToTensor()
,download=False)
dataloader = DataLoader(dataset, batch_size=1)
class ZYJ(nn.Module):
def __init__(self):
super(ZYJ, self).__init__()
self.model = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
ReLU(),
Conv2d(32, 32, 5, padding=2),
Sigmoid(),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)
def forward(self, input):
output = self.model(input)
return output
zyj = ZYJ()
loss = nn.CrossEntropyLoss()
for data in dataloader:
imgs, targets = data
output = zyj(imgs)
#print(output)
#print(targets)
result_loss = loss(output, targets)
#print(result_loss)
result_loss.backward() #反向传播获得梯度,后续用优化器进行优化参数
十二、优化器
loss = nn.CrossEntropyLoss()
optim = torch.optim.SGD(zyj.parameters(), lr=0.01)
for epoch in range(50):
running_loss = 0.0
for data in dataloader:
imgs, targets = data
output = zyj(imgs)
#print(output)
#print(targets)
result_loss = loss(output, targets)
optim.zero_grad() #将上一次的梯度数据删除
#print(result_loss)
result_loss.backward() #反向传播获得梯度,后续用优化器进行优化参数
optim.step()
#print(result_loss)
running_loss = running_loss + result_loss
print(running_loss)
十三、现有模型的使用和修改
pretrained为true时,就是带参数的
改模型两个思路:1.将最后一个线性层的out_feature改成10
2.添加一个线性层
十四、模型的保存
方式一保存的是模型的结构+模型的参数
方式二保存的是模式的参数
载入方法 ,推荐第二种,第一种更简单
十五、完整的模型训练套路
import torch
from torch.utils.tensorboard import SummaryWriter
from model_self import *
import torchvision
from torch.nn import Conv2d
from torch.optim import SGD
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear
# 准备数据及
train_data = torchvision.datasets.CIFAR10('./cifar10', True, transform=torchvision.transforms.ToTensor(),download=False)
test_data = torchvision.datasets.CIFAR10('./cifar10', False, transform=torchvision.transforms.ToTensor(),download=False)
# 求长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据及长度:{}".format(train_data_size))
print("测试数据集长度:{}".format(test_data_size))
# 加载数据及
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 搭建网络
# 创建网络模型
lyy = Lyy()
# 创建损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
learning_rate = 1e-2
optimizer = torch.optim.SGD(lyy.parameters(),lr=learning_rate)
# 设置训练网络参数
total_train_step = 0
total_test_step = 0
epoch = 10
# 添加tensorboard
writer = SummaryWriter("logs")
for i in range(epoch):
print("-----第{}轮训练开始了-----".format(i+1))
# 训练步骤开始
for data in train_dataloader:
imgs, tragets = data
output = lyy(imgs)
loss = loss_fn(output, tragets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step += 1
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)
# 测试步骤开始
total_test_loss = 0
total_accuracy = 0
with torch.no_grad(): #将网络设置为no_grad模式
for data in test_dataloader:
imgs, tragets = data
output = lyy(imgs)
loss = loss_fn(output, tragets)
total_test_loss += loss
accuracy = (output.argmax(1) -- tragets).sum()
total_accuracy += accuracy
print("整体测试机上误差:{}".format(total_test_loss))
print("整体测试机上的正确率:{}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy/total_test_step)
total_test_step += 1
# torch.save(lyy, "lyy_{}.pth".format(i))
# print("模型已保存")
writer.close()
十六、利用GPU加速
第一种方法:
第二种方法:device(“cpu”)or device(“cuda”)
十七、如何使用模型?
import torch
import torchvision
from PIL import Image
from model import *
img_path = "test_imgs/1.jpg"
image = Image.open(img_path)
print(image)
image = image.convert('RGB')
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor()])
image = transform(image)
print(image.shape)
model = torch.load("model/tudui_9.pth", map_location=torch.device('cpu'))
print(model)
image = torch.reshape(image, (1, 3, 32, 32))
print(image.shape)
model.eval()
with torch.no_grad():
#image = image.cuda()
output = model(image)
print(output)
print(output.argmax(1))
# 'airplane'=0
# 'automobile'=1
# 'brid'=2
# 'cat'=3
# 'deer'=4
# 'dog'=5
# 'frog'=6
# 'horse'=7
# 'ship'=8
# 'truck'=9
完结撒花(历时一个月,我可真够拖延的)