[Pytorch入门] 土堆课程笔记

from PIL import Image # PIL相当于python官方图像处理库，非常适合于图像归档以及图像的批处理任务

# 用 PIL显示图片
img_path ="E:\\pycharm\\project\\project_test\\test_0306\hymenoptera_data\\train\\ants_image\\0013035.jpg"
img = Image.open(img_path)
img.show()

相对引用：

from PIL import Image # PIL相当于python官方图像处理库，非常适合于图像归档以及图像的批处理任务

# 用 PIL显示图片
img_path ="hymenoptera_data/train/ants_image/0013035.jpg"
img = Image.open(img_path)
img.show()

输出为：

6.2 完整代码：

代码写的挺清楚的，也从cv2和pil两种方式展示了图片

from torch.utils.data import Dataset    # 从torch.的工具区（utils）中的data区获取数据集
from PIL import Image # PIL相当于python官方图像处理库，非常适合于图像归档以及图像的批处理任务
import cv2
import os

# 用 PIL显示图片
img_path ="E:\\pycharm\\project\\project_test\\test_0306\hymenoptera_data\\train\\ants_image\\0013035.jpg"
# img = Image.open(img_path)
# img.show()

# 用cv2显示图片，
img = cv2.imread(img_path)    # Image.open(img_path)
def cv2_show_cv_image(image):  # 使用cv2显示cv2打开的图片
    cv2.namedWindow('image', 0)  # 命名一个窗口
    cv2.resizeWindow('image', 600, 500)  # 将窗口大小进行调整，这两部主要是为了防止图片过大，屏幕显示不下
    cv2.imshow('image', img)  # 显示图片
    cv2.waitKey(0)  # 等待，按任意键跳过，如果没有这个，窗口会是一闪而过
    cv2.destroyWindow('image')  # 这个和下一句都是关闭窗口，只不过这是关闭指定窗口
    # cv2.destroyAllWindows()  # 关闭此时cv2所有打开的窗口
    # cv2.imwrite('cat.png', image)  # cv2保存图片


# 获取当前目录
current_path = os.getcwd()
print(current_path)

# 将所有数据放进列表，通过指针获取
dir_path ="hymenoptera_data/train/ants_image"  # 这里是相对引用
img_path_list = os.listdir(dir_path)   # dir文件夹，listdir就是将文件夹下的所有东西变成列表
img_path_list[1]

class Mydata (Dataset):    # 是类不是模块
    def __init__(self,root_dir,label_dir):  # 理解成文件夹就行
        # 变量前面加了self，那么在任何实例方法(非staticmethod和calssmethod)
        # 就都可以访问这个变量了，如果没有加self，只有在当前函数内部才能访问这个变量
        # 因为python里没有变量声明，最好确保所有带self前缀的变量是在__init__中首次出现
        self.root_dir = root_dir       # 函数中一个变量不能传递给另外一个变量，加self，相当于指定了一个类中的全局变量，可以给后面的变量使用
        self.label_dir = label_dir
        self.path = os.path.join(self.root_dir,self.label_dir)
        self.listdir = os.listdir(self.path)
        self.img_path = os.listdir(self.path)  # 图片下所有的地址都以指针的形式保存

    def __getitem__(self, idx):  # idx:索引，指针吧我觉得，根据索引去获取每一个图片
        img_name =self.img_path[idx]
        img_item_path =os.path.join(self.root_dir,self.label_dir,img_name)   # join(path, *paths)实例化的加指针
        img = Image.open(img_item_path)
        # img.show()
        label = self.label_dir
        return img,label

    def __len__(self): 
        return len(self.img_path)  # 长度就是文件夹列表的长度


root_dir = "hymenoptera_data/train"
ants_label_dir = "ants_image"
bees_label_dir = "bees_image"
ants_dataset = Mydata(root_dir,ants_label_dir)
bees_dataset = Mydata(root_dir,bees_label_dir)
train_dataset = ants_dataset + bees_dataset
len(train_dataset)    # 把两个数据集拼接起来，数据长度就是两个之和
print(len(train_dataset))
img,label = train_dataset[220]   # 根据数组地址，从两个数据集里找，通过修改数组元素可以找到不同的数据
img.show()

6.3 补充（class类）

6.3.1 class实例化

class student:
    name = None
    age = None
    contourpy = "中国"

stu_1 = student()
stu_1.name = "xixi"
stu_1.age = 18
print(stu_1.name)
print(stu_1.age)
print(stu_1.contourpy)

6.3.2 class调用

class student:
    name = None
    age = None

    def say_hi(self):
        print(f"我是{self.name},很高兴认识你")

    def say_hello(self,msg):
        print(f"我是{self.name},{msg}")

std1 = student()    # 实例化
std1.name = "xixi"
std1.say_hi()
std1.say_hello("加一") # 因为self不用管，就相当于hello里只有一个参数，传进去就行了

6.3.3 calss中的call

class Person:
    def __call__(self,name):
        print("__call"+name)

    def hello(self,name):
        print("hello"+name)

# Python中那些能够在后面加()来调用执行的对象，被称为可调用对象。可调用对象包括自定义函数、Python内置函数、实例对象和实例方法等。
# call()方法是Python中一个很特殊的方法。凡是可调用对象，都可以通过调用__call__()方法来调用该对象。
# 如果类中定义了__call__()方法，那么该类的实例对象也将成为可调用对象。该对象被调用时，将执行__call__()方法中的代码。
person = Person()   # 实例化
person("haha")      # 内置__call__ 可以直接对象名+属性，参数
person.hello("xixi")   #

6.3.4 rename填充数据集

# 填充数据集
import os

root_dir ="hymenoptera_data/train"
target_dir = "ants_image"
img_path = os.listdir(os.path.join(root_dir,target_dir)) # 把"hymenoptera_data/train/ants_image"
"""
os.listdir(path)
os.listdir的返回值是一个列表，列表里面存储该path下面的子目录的名称
"""
print(img_path)

# label读取的就是：把"ants_image"分为两份，然后把间隔的第一份当作label
label = target_dir.split('_')[0]  # 以_作为分隔符分隔数据，[0]读取第0位分割出来的东西
print(label)
out_dir = "ants_label"
for i in img_path:
    file_name = i.split('.jpg')[0]
    with open (os.path.join(root_dir,out_dir,"{}.txt".format(file_name)),'w') as f:
        f.write(label)
"""
with open(r'filename.txt') as f:
    for l in f:
        l = json.loads(l)  #文件的读操作

with open('Hello.txt', 'w') as f:
   f.write('hello world')  #文件的写操作,w以写方式打开
"""

6.4 rename填充数据集

# 填充数据集
import os

root_dir ="E:\\pycharm\\project\\project_test\\test_0306\\hymenoptera_data\\train"
target_dir = "ants_image"
img_path = os.listdir(os.path.join(root_dir, target_dir))     # 把"hymenoptera_data/train/ants_image"
"""
os.listdir(path)
os.listdir的返回值是一个列表，列表里面存储该path下面的子目录的名称
"""
# print(img_path)

# label读取的就是：把"ants_image"分为两份，然后把间隔的第一份当作label
label = target_dir.split('_')[0]  # 以_作为分隔符分隔数据，[0]读取第0位分割出来的东西
print(label)
out_dir = "ants_label"

for i in img_path:
    file_name = i.split('.jpg')[0]
    with open(os.path.join(root_dir, out_dir, "{}.txt".format(file_name)), 'w') as f:
        f.write(label)

"""
with open(r'filename.txt') as f:
    for l in f:
        l = json.loads(l)  #文件的读操作

with open('Hello.txt', 'w') as f:
   f.write('hello world')  #文件的写操作,w以写方式打开
"""

P7-8 tensorboard的使用

用tensorboard的时候命名文件里不要出现中文，不要出现空格！！！

7.1 tensorboard的安装：

在终端里

pip install tensorboard

7.2 tensorboard使用

SummaryWriter：就相当于文件夹，把自己想存的东西放进去。

writer.add_image()：读取图片，第一个参数为标题，第二个是y轴，第三个是x轴

writer.add_scalar():绘制折线图，第一个参数为标题，第二个是y轴，第三个是x轴

7.3 完整代码

from torch.utils.tensorboard import SummaryWriter


writer = SummaryWriter("xixi")
writer.add_image()
"""
add_image(self, tag, img_tensor, global_step=None, walltime=None, dataformats='CHW'):
        Args:
            tag (string): Data identifier    # 图像的title
            img_tensor (torch.Tensor, numpy.array, or string/blobname): # 图像的类型：字符串、torch.Tensor, numpy.array
            global_step (int): Global step value to record    # 训练的步骤
            walltime (float): Optional override default walltime (time.time())  #  这个一般不是很常用
            seconds after epoch of event
            dataformats='CHW' 
"""

# 读取图片，用tensorboard打印显示
import numpy as np
from torch.utils.tensorboard import SummaryWriter
img_path = "hymenoptera_data/train/ants_image/67270775_e9fdf77e9d.jpg"

# from PIL import Image
# img_PIL = Image.open(img_path)
# print(type(img_PIL))

import cv2
img_cv2 = cv2.imread(img_path)
print(type(img_cv2))
print(img_cv2.shape)

# import numpy as np
# img_np = np.array(img_path)
# print(type(img_np))

writer = SummaryWriter("xixi")
writer.add_image("data_read", img_cv2, 2, dataformats='HWC')  # 要求的数据格式是HWC，通道数在最后面！

# y = 2x
# 用tensorboard绘制曲线图
for i in range(100):
    writer.add_scalar("y=2x", 3*i, i)   # 第一个参数是标题，第二个是y轴，第三个是x轴
    # 画出的图像会回环是因为他会有一个拟合的操作，
    # 解决：①把对应的logs下的所有文件都删掉再重新开始  ②创建一个新文件，SummaryWriter（”新文件夹“）
"""
def add_scalar(self, tag, scalar_value, global_step=None)
      Args:
          tag (string): Data identifier  图表标题
          scalar_value (float or string/blobname): Value to save  y轴
          global_step (int): Global step value to record  x轴
"""
writer.close()

运行后左边会出现一个xixi的文件夹（每运行一次就会产生一个文件）

运行后打开终端，输入： tensorboard --logdir=xixi

点击弹出来的网址

PS E:\pycharm\tudui_test\test_0306> tensorboard --logdir=xixi
TensorFlow installation not found - running with reduced feature set.
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.10.0 at http://localhost:6006/ (Press CTRL+C to quit)

7.4 一些使用tensorboard的tips：

①不改变标题只修改里面的参数，就会在原图表里直接绘制，就是说会有回环或者重叠，解决：

把对应的logs下的所有文件都删掉再重新开始
创建一个新文件，SummaryWriter（”新文件夹“）

②tensorboard为了加速只需要在tensorboard 可视化命令时参加参数--samples_per_plugin，这里面的–samples_per_plugin=images=10000000就是显示1000000张图片出来，所以这个值尽可能大一点就好。

P9 Transform的使用（一)

Transform相当于一个工具箱，按住ctrl点击，可以看到官方解释说明

一个个class相当于具体的工具，点结构会看到各种工具，如下

工具就可以使用上述的所有

9.1 transforms.ToTensor

通过transforms.ToTensor去看两个问题
 1、transform该如何使用（python）
 2、为什么需要tensor数据

transform该如何使用（python）：

from torchvision import transforms
from PIL import Image

# 通过transforms.ToTensor去看两个问题
# 1、transform该如何使用（python）
# 2、为什么需要tensor数据集

# 绝对路径：E:\pycharm\tudui_test\test_0306\hymenoptera_data
# 相对路径：hymenoptera_data/train/ants_image/0013035.jpg
img_path = "hymenoptera_data/train/ants_image/0013035.jpg"  # 不用绝对路径是因为他有转义字符，需要双斜杠转化

# 用PIL读取
img = Image.open(img_path)
print(img)    # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=768x512 at 0x1883790A760>

# 1、transform该如何使用（python）
trans_tensor = transforms.ToTensor()(img)
print(trans_tensor)
# transform的使用
# ToTensor()是工具箱里的一个工具，加参数需要重新加括号~
    # Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor（HWC)->(CHW)

"""
tensor([[[0.3137, 0.3137, 0.3137,  ..., 0.3176, 0.3098, 0.2980],
         [0.3176, 0.3176, 0.3176,  ..., 0.3176, 0.3098, 0.2980],
         [0.3216, 0.3216, 0.3216,  ..., 0.3137, 0.3098, 0.3020],
"""

P10 Transform的使用（二）

tensor里包装了神经网络所需要的一些参数，把代码复制进控制台或者debug。

可以看到右边的一些参数

尝试一下cv2

在终端里安装 pip install opencv-python

!!注意一下，imread是无法读取包含中文的路径的，用绝对路径的小伙伴注意一下

P12 常见的Transforms（一）

12.1 call

Python中那些能够在后面加()来调用执行的对象，被称为可调用对象。可调用对象包括自定义函数、Python内置函数、实例对象和实例方法等。

call()方法是Python中一个很特殊的方法。凡是可调用对象，都可以通过调用__call__()方法来调用该对象，如果类中定义了__call__()方法，那么该类的实例对象也将成为可调用对象。该对象被调用时，将执行__call__()方法中的代码。

class Person:
    def __call__(self,name):
        print("__call"+name)

    def hello(self,name):
        print("hello"+name)

__call__()方法中的代码。
person = Person()   # 实例化
person("haha")      # 内置__call__ 可以直接对象名+属性，参数
person.hello("xixi")   #

12.2 transforms的使用

import cv2
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

img_path = "images/5a59b02cf81e017531d1701aa6b2e66.jpg"
img = Image.open(img_path)
# print(img)
# print(type(img))    # numpy.ndarray
writer = SummaryWriter("transforms_use")

# ToTensor的使用
trans_totensor = transforms.ToTensor()(img)
# print(type(trans_totensor))    # <class 'torch.Tensor'>
writer.add_image("transforms_use",trans_totensor)

writer.close()

！！ img = cv2.imread(img_path) 读取的图片与原图不一致，下面输出的tensor数据也不一样

12.3 Normolize使用

import cv2
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

img_path = "images/5a59b02cf81e017531d1701aa6b2e66.jpg"
img = Image.open(img_path)
# img.show()
print(img.mode)
# print(img)
# print(type(img))    # numpy.ndarray
writer = SummaryWriter("use_transforms")

# ToTensor的使用
trans_totensor = transforms.ToTensor()(img)
# print(type(trans_totensor))    # <class 'torch.Tensor'>
writer.add_image("transforms_use",trans_totensor)


# Normalize归一化的使用
print(trans_totensor[0],[0],[0])    # 输出图像第一层的第一行第一列
trans_norm = transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])  # 图片是三通道的，每个维度都要定义均值和方差
img_norm = trans_norm(trans_totensor)      #  Normalize a tensor image with mean and standard deviation.
print(img_norm[0],[0],[0])
writer.add_image("img_norm",img_norm,2)
writer.close()

看他的输出，如果transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])，均值和标准差在每个通道上都设为0.5，用数学公式看就相当于output = input*2-1，

tensorboard的结果：

12.4 我有疑问Image.open和cv2.imread打开不一致

img = Image.open(img_path)

img = cv2.imread(img_path)

使用PIL/Pillow的Image.open()和OpenCV的cv2.imread()打开同一张图片时,得到的图像可能会有微妙的差异。这主要有以下几个原因:

默认颜色空间不同:Image.open()默认读取RGB颜色空间的图片,cv2.imread()默认读取BGR颜色空间的图片。这会导致颜色通道的顺序不同。
默认像素值范围不同:Image.open()默认图片的像素值范围是0-255,cv2.imread()默认范围是0-255。这可能会导致像素值映射不同。
色彩模式不同:Image.open()读取的图片模式可以是RGB、RGBA、L等,cv2.imread()读取的图片只有BGR一个模式。这会导致色彩表示不同。
图像数据存储不同:Image.open()读取的图像数据存储为numpy数组,cv2.imread()读取的图像数据存储为OpenCV mat对象。尽管底层都是数组,但接口不同。
图像处理手段不同:Image.open()基于PIL/Pillow,cv2.imread()基于OpenCV。两者的图像处理函数和方法都不同。

P13 常见的Transforms（二）

13.1 Resize（）

我的一个小报错~

UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).
warnings.warn(

按照报错的提示，在resize后加上 antialias=True，就不会有UserWarning

from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import cv2 as cv
from PIL import Image

writer = SummaryWriter("img_resize")

img_path = "images/5a59b02cf81e017531d1701aa6b2e66.jpg"
img = Image.open(img_path)
print(type(img))    # <class 'numpy.ndarray'>

# resize()的使用
img_tensor = transforms.ToTensor()(img)  # Resize 要求图像格式为tensor
print(type(img_tensor))  # <class 'torch.Tensor'>

print(img.size)     # 获取图像尺寸
trans_resize = transforms.Resize((512,512),antialias=True)  # 剪裁图像尺寸为512*512
img_resize = trans_resize(img)   # 讲指定图像剪裁为指定大小
print(type(img_resize))    # <class 'torch.Tensor'>
print(img_resize.size)

更改参数会得到不同的结果：

13.2 Compose（）

compose()相当于一个transforms缝合的操作：

from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import cv2 as cv
from PIL import Image

writer = SummaryWriter("img_resize")

img_path = "images/5a59b02cf81e017531d1701aa6b2e66.jpg"
img = Image.open(img_path)
print(type(img))    # <class 'numpy.ndarray'>

# resize()的使用
img_tensor = transforms.ToTensor()(img)  # Resize 要求图像格式为tensor
print(type(img_tensor))  # <class 'torch.Tensor'>

print(img.size)     # 获取图像尺寸
trans_resize = transforms.Resize(512,antialias=True)  # 剪裁图像尺寸为512*512
img_resize = trans_resize(img)   # 讲指定图像剪裁为指定大小
print(type(img_resize))    # <class 'torch.Tensor'>
print(img_resize.size)       # (512, 512)


# compose()使用  就是把很多transform的操作组合在一起执行，然后放在一个列表[]里就行
trans_resize_2 = transforms.Resize(224)   #Resize里只有一个参数：将短边缩放致x，长宽比保持不变
# 里面的参数是列表[],数据类型是transforms 两个参数的数据类型要匹配起来

# trans_resize_2输出的数据类型要和transforms.ToTensor()输入的数据类型保持一致
trans_com = transforms.Compose([trans_resize_2,transforms.ToTensor()])
trans_resize_2 = trans_com(img)  # 传入图片
print(trans_resize_2)
writer.add_image("trans_compose",trans_resize_2,5)
# writer.add_image("trans_compose",trans_resize_2,1)

writer.close()

13.3 RandomCrop()

from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

writer = SummaryWriter("randomcrop")
img_path = "images/5a59b02cf81e017531d1701aa6b2e66.jpg"
img = Image.open(img_path)
# randomcrop
trans_randomcrop = transforms.RandomCrop(512)  # 长宽都是512
img_randamcrop = transforms.Compose([trans_randomcrop,transforms.ToTensor()])

for i in range(10):
    img_rancrop = img_randamcrop(img)
    writer.add_image("randomcrop",img_rancrop,i)   # 依次写入表名称

writer.close()   # 关闭时写入内存，否则它每隔120s写入一次

！！关注输入和输出数据类型

！！多看官方文档

！！关注方法需要什么参数（不知道返回值是什么数据类型的时候，print一下~）

P14 Torchvision中Dataset的使用

Datasets — Torchvision 0.15 documentation (pytorch.org)

里面有很多分类好的数据集，每个数据集都有详细的讲解

以cifar10为例：

这个数据集比较小，32*32，所以分辨率不是很高

第一次下载的话可能会比较慢，可以运行复制下面框里的链接用迅雷下载。

import torchvision
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

#  数据集里的每个数据都变成tensor
# 更改数据集里的数据类型的时候记得一定一定改torchvision.datasets.CIFAR10（transform=dataset_transform）
dataset_transform = transforms.Compose([torchvision.transforms.ToTensor()])

#  train=True则为训练集，train=True则为测试集，download=True从官网下载
#  如果要使用官网的一些数据集，download常年设置为true

train_set = torchvision.datasets.CIFAR10("./dataset_CIFAR10",train=True,download=True)
test_set = torchvision.datasets.CIFAR10("./dataset_CIFAR10",train=True,download=False)

# test_set[0]对应class里的第一个类别airplane
print(test_set[0])    # (<PIL.Image.Image image mode=RGB size=32x32 at 0x2930708C7C0>, 6) 6:target
print(test_set.classes)  # ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

img,target = test_set[0]
print(img)    # <PIL.Image.Image image mode=RGB size=32x32 at 0x29E418F3220>
print(target)   # 6
print(test_set.classes[target])   # class是个列表，通过下标可以找到对应的标签类别frog
img.show()
print(test_set[0])    # (<PIL.Image.Image image mode=RGB size=32x32 at 0x29E433D3B80>, 6)

用tensorboard可视化一下，完整代码如下：

！！img必须是tensor数据类型

import torchvision
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

#  数据集里的每个数据都变成tensor
# 更改数据集里的数据类型的时候记得一定一定改torchvision.datasets.CIFAR10（transform=dataset_transform）
dataset_transform = transforms.Compose([torchvision.transforms.ToTensor()])

#  train=True则为训练集，train=True则为测试集，download=True从官网下载
#  如果要使用官网的一些数据集，download常年设置为true

train_set = torchvision.datasets.CIFAR10("./dataset_CIFAR10",train=True,transform=dataset_transform,download=True)
test_set = torchvision.datasets.CIFAR10("./dataset_CIFAR10",train=True,transform=dataset_transform,download=False)

writer = SummaryWriter("test_set")
for i in range(10):
    img,target = test_set[i]
    writer.add_image("test_set",img,i)   # i是步长,img必须是tensor数据类型
    # 读取的是test_set列表里对应下标的图片

writer.close()

P15 Torchvision中DataLoader的使用

dataset就相当于一摞牌，dataloader就相当于如何抓取牌
# batch_size 是每次抓牌抓几张
# shuffle 每一轮结束后还要不要洗牌
# num_workers 多进程还是单进程加载数据，一般设置为0，因为可能windows下大于0的话可能会报错
# drop_last 100张牌每次取三张，最后一张牌要不要丢弃，True为不要，False为要

import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

trans_tensor = torchvision.transforms.ToTensor()
# 测试集的train=False
test_data = torchvision.datasets.CIFAR10("dataset_CIFAR10",train=False,transform=trans_tensor)


test_loader = DataLoader(dataset=test_data,batch_size=64,shuffle=True,num_workers=0,drop_last=True)

# CIFAR10 -> return img target
# 测试数据集第一章图片及target
img,target = test_data[0]
print(img.shape)
print(target)
print(test_data.classes[target])

writer = SummaryWriter("dataloader1")
for epoch in range(2):
    step = 0      # 每一轮执行下面的子循环读取一遍数据集，shuffle=True数据就会打乱重新读
    for data in test_loader:
        imgs,targets = data
        # print(imgs.shape) #  torch.Size([4, 3, 32, 32]) 4层，每个都是32*32有三个通道的图片
        # print(targets)   # tensor([1, 1, 6, 5])  这四个图片对应的target依次是1，1，6，5
        # writer.add_images 要+s
        writer.add_images("epoch:{}".format(epoch),imgs,step) # 因为已经转为tesor数据类型了，batch_size随机抓取的就在一个tensor张量里放着
        step = step + 1

writer.close()

P16 nn.Module的使用

import torch
from torch import nn


class Haha(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self,input):
        output = input+1
        return output


haha = Haha()
x = torch.tensor(1.0)
output = haha(x)
print(output)

P17 浅说卷积

Torch.nn neural network

包括Conv2d，MaxPool2d，ReLU, Linear 等等，后面会逐步介绍。

Containers: 相当于一个容器，给神经网络定义了一些骨架，给里面添加不同的内容就能组成神经网络

Containers下面也有非常多模块，

Module：所有神经网络基本的类，基本上自己写的所有的类都需要继承它（nn.Module)，相当于一个模板，你只需要修改里面的部分内容就好了，刚学习的话就需要debug看它具体每一步走到哪。

Sequential：就是可以整合所有的conv，maxpool2d放在一起，然后整体调用，和compose（）的用法差不多。

练习代码~

import torch
import torch.nn.functional as F

# 将数据类型转为tensor,然后两个中括号连着就是二维
input = torch.tensor([[1,2,0,3,1],
                      [0,1,2,3,1],
                      [1,2,1,0,0],
                      [5,2,3,1,1],
                      [2,1,0,1,1]])
kernel = torch.tensor([[1,2,1],
                       [0,1,0],
                       [2,1,0]])
print(input.shape)   # torch.Size([5, 5])
print(kernel.shape)  # torch.Size([3, 3])

input1 = torch.reshape(input,(1,1,5,5))   # (1,1,5,5) -> (barch_size,channel,H,w)
kernel1 = torch.reshape(kernel,(1,1,3,3))   # (1,1,5,5) -> (barch_size,channel,H,w)

print(input1.shape)    # torch.Size([1, 1, 5, 5])
print(kernel1.shape)   # torch.Size([1, 1, 3, 3])

# 输出
output1 = F.conv2d(input1,kernel1,stride=1)
print(output1)     # 因为input是4维，输出的也是4维
print(output1.shape)  # torch.Size([1, 1, 3, 3])
print(output1.size)   # <built-in method size of Tensor object at 0x000001BDE1B1AD10>

output2 = F.conv2d(input1,kernel1,stride=2)
print(output2)     # 因为input是4维，输出的也是4维

output3 = F.conv2d(input1,kernel1,stride=1,padding=1)   # padding 可以避免边缘的数据只计算一次
print(output3)

输出：

torch.Size([5, 5])
torch.Size([3, 3])
torch.Size([1, 1, 5, 5])
torch.Size([1, 1, 3, 3])
tensor([[[[10, 12, 12],
          [18, 16, 16],
          [13,  9,  3]]]])
torch.Size([1, 1, 3, 3])
<built-in method size of Tensor object at 0x000001E9FFD6D040>
tensor([[[[10, 12],
          [13,  3]]]])
tensor([[[[ 1,  3,  4, 10,  8],
          [ 5, 10, 12, 12,  6],
          [ 7, 18, 16, 16,  8],
          [11, 13,  9,  3,  4],
          [14, 13,  9,  7,  4]]]])

torch.Size括号中有几个数字就是几维      
torch.Size([1, 1, 3, 3])
tensor([[[[10, 12, 12],
          [18, 16, 16],
          [13,  9,  3]]]])
第一层（最外层）中括号里面包含了1个中括号（以逗号进行分割），这就是([1, 1, 3, 3])中的1
第二层中括号里面包含了三个中括号（以逗号进行分割），这就是([1, 1, 3, 3])中的1
第三层中括号里面包含了三个中括号（以逗号进行分割），这就是([1, 1, 3, 3])中的3
第四层中括号里面包含了三个数（以逗号进行分割），这就是([1, 1, 3, 3])中的3

P18 Conv Layers 卷积层

torch.nn.function 相当于汽车轮子齿轮的运转(更细致一点）

torch.nn 齿轮封装好，提供了一个方向盘

这个公式还挺重要，后面出现有的网络结构，需要自己算stride和padding。。

N 代表batch_size，每次取多少张数据

minibatch：从训练数据里选出一批数据

padding：可以避免input图片边缘的数据只计算一次，padding_mode = 'zero'

kernel_size = 3：卷积核大小是3*3

代码来咯！

import torchvision.transforms
from PIL.Image import Image
from torch.nn import Conv2d
from torch import nn
from torch.utils import tensorboard
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision.transforms import transforms
import torch

dataset = torchvision.datasets.CIFAR10("..\CIFAR10",train=False,transform=torchvision.transforms.ToTensor(),download=True)
dataloader = DataLoader(dataset,batch_size=64)

class Haha(nn.Module):      # 自定义类的名称不要和torch里的方法同名
    def __init__(self):
        super(Haha, self).__init__()
        self.conv2 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1)

    def forward(self,x):
        x = self.conv2(x)
        return x

conv2 = Haha()
print(conv2)

# 神经网络里的每个图片
writer = SummaryWriter("conv2d")
step = 0
for data in dataloader:
    imgs,targets = data   #totensor
    output = conv2(imgs)  # 前面已经实例化过了，直接用就行，不要再用类名称了
    print(imgs.shape)
    print(output.shape)  # torch.Size([64, 6, 30, 30])  6个通道不知道在tensorboard里怎么显示
    output = torch.reshape(output, (-1, 3, 30, 30))   # 不知道batch_size是什么就写-1，会根据后面的值自己计算， 通道数变少相当于平铺了，batch_size变多了
    writer.add_images("conv2_output", output, step)
    step += 1

writer.close()

# 通道数可以根据官方文档里的公式推导出来
# 长宽不变，通道数变多可能kernel_num 变多了

输出：

Haha(
  (conv2): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
)
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])
torch.Size([64, 3, 32, 32])
torch.Size([64, 6, 30, 30])

P23 损失函数与反向传播

23.1 损失函数

23.1.1 torch.nn.L1Loss

损失函数当然越小越好

① 计算实际输出与目标输出的差距

② 为我们更新提供一定的依据（反向传播）

torch.nn.L1Loss

输入x：1，2，3 目标y：1，2，5

L1Loss： $\frac{1-1+2-2+5-3}{3}$ =0.667（保留三位小数）

23.1.2 torch.nn.MSELoss 均方误差

公式很好懂，shape也没啥子要注意的地方

23.1.3 torch.nn.CrossEntropyLoss 交叉熵

官网的公式太难懂了。。。

信息量：一个时间发生的可能性越小，信息量就越大

熵：信息量的期望值（所有取值二点信息量的期望）

一个随机变量的取值越不确定，熵越大，反之越小。

交叉熵：两个概率分布之间的距离 = 通过概率分布q来表达gail分布p的困难程度

p为正确答案，q代表的是预测值，交叉熵越小，2个概率分布越接近

这部分一般要先经过softmax一下，使神经网络的输出变为一个概率分布，再使用交叉熵来计算预测的概率分布和真实值概率分布之间的距离。

上图来自B站up主霹雳吧啦Wz图像处理篇课件~

这三部分的代码如下：

import torch
from torch.nn import L1Loss, MSELoss, CrossEntropyLoss

inputs = torch.tensor([1, 2, 3], dtype=torch.float32)  # input里的数据类型都是int，不含long，会报错
targets = torch.tensor([1, 2, 5], dtype=torch.float32)
inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))

# loss = L1Loss(reduction='sum')  # 就是把每个部分的误差加起来 tensor(2.)
loss = L1Loss(reduction='mean')  # 每部分误差的平均数，和默认不写是一样的 tensor(0.6667)
r = loss(inputs, targets)
print(r)

# 均方误差
loss_mseloss = MSELoss()   # 就是给每一项的差值平方和/总个数
result = loss_mseloss(inputs, targets)    # tensor(1.3333)
print(result)

# 交叉熵
x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])   # 这里必须是tensor数据类型
x = torch.reshape(x, (1, 3))
loss_cross = CrossEntropyLoss()
result2 = loss_cross(x, y)
print(result2)      # tensor(1.1019)

输出：

tensor(0.6667)
tensor(1.3333)
tensor(1.1019)

23.2 反向传播

反向传播：尝试如何调整参数才会导致最终的Loss变小。

从loss开始推到参数，和网络的顺序相反，所以是反向传播。

梯度就是求导，比如在某个点让他沿着梯度方向下降，就会达到loss最小值。

import torch
import torchvision
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear, CrossEntropyLoss
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("cifar10",train=False,download=True,
                                       transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)

class Haha(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.model = Sequential(
            Conv2d(in_channels=3,out_channels=32,kernel_size=5,stride=1,padding=2),
            MaxPool2d(2),
            Conv2d(in_channels=32,out_channels=32,kernel_size=5,stride=1,padding=2),
            MaxPool2d(2),
            Conv2d(in_channels=32,out_channels=64,kernel_size=5,stride=1,padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024,64),
            Linear(64,10)
        )

    def forward(self,x):
        x = self.model(x)
        return x

# 计算实际输出和目标之间的差距
haha = Haha()
loss = CrossEntropyLoss()
for data in dataloader:
    imgs, targets = data
    # print(imgs)
    # print(targets)
    outputs = haha(imgs)   # 先把这个东西放进模型里跑一趟啊，不跑哪来的损失函数
    result = loss(outputs,targets)
    result.backward()    # 反向传播，运行这一行之前，grad = None，运行了之后就会有数值  
    print(result)    # tensor(2.3552, grad_fn=<NllLossBackward0>)某一步的结果

输出部分结果为：

tensor(2.1898, grad_fn=<NllLossBackward0>)
tensor(2.3114, grad_fn=<NllLossBackward0>)
tensor(2.3232, grad_fn=<NllLossBackward0>)
tensor(2.3348, grad_fn=<NllLossBackward0>)
tensor(2.4124, grad_fn=<NllLossBackward0>)
tensor(2.4264, grad_fn=<NllLossBackward0>)
tensor(2.3046, grad_fn=<NllLossBackward0>)
tensor(2.4234, grad_fn=<NllLossBackward0>)

p24 优化器

根据损失梯度进行参数调整，达到降低loss的目的，整体思想就是input数据进模型里跑一趟，输出output，通过上节的损失函数计算损失梯度，loss.backward()进行反向传播，optim.step（）每个参数进行调优，循环里每次的梯度都不相关，所以每一次调节都要清零optim.zero_grad()

如果只训练一轮，就是说数据集里所有的数据都只看了一次，通常需要训练上千上万轮，初学阶段仅以20轮为例，将每一轮所有损失和相加，可以看到loss在降低。

选择合适的优化器，只需要设置模型参数和学习率即可，后面的先采用默认值，若训练模型有需要再自行设置，学习率lr设置太大不稳定，太小训练速度又很慢，所以一般训练刚开始设置大一点，后面再调小。

import torch
import torchvision
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear, CrossEntropyLoss
from torch.optim import optimizer
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("cifar10",train=False,download=True,
                                       transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=64)

class Haha(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.model = Sequential(
            Conv2d(in_channels=3,out_channels=32,kernel_size=5,stride=1,padding=2),
            MaxPool2d(2),
            Conv2d(in_channels=32,out_channels=32,kernel_size=5,stride=1,padding=2),
            MaxPool2d(2),
            Conv2d(in_channels=32,out_channels=64,kernel_size=5,stride=1,padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024,64),
            Linear(64,10)
        )

    def forward(self,x):
        x = self.model(x)
        return x

haha = Haha()
loss_cross = CrossEntropyLoss()
# 选择合适的优化器，并设置模型参数和学习率（lr）
# 学习速率不能设置的太大（训练过程会很不稳定），也不能设置的太小（模型训练会比较慢）
# 一般训练过程刚开始lr会设置的比较大，后面会设置的比较小。
optim = torch.optim.SGD(haha.parameters(), lr=0.01)     # 其他的参数先用默认的
for epoch in range(20):
    running_loss = 0.0         # 整体误差求和
    for data in dataloader:      # 所有的数据只看了一次，所以要进行多轮训练
        imgs, targets = data    # 依次读取数据
        output = haha(imgs)     # 把读取的数据放进自己的模型里跑一趟
        result_loss = loss_cross(output, targets)   # 获得损失函数
        optim.zero_grad()       # 网络模型中每个可以调节参数的对应梯度设为0
        result_loss.backward()    # 优化器要对其中的参数进行优化，需要每个参数的梯度，所以要用反向传播
        optim.step()            # 每个参数进行调优，且循环里每次的梯度都不相关，所以每一次调节都要清零
        running_loss =running_loss + result_loss
    print(running_loss)          # 每一轮学习过程中，整体误差的总和

输出：

Files already downloaded and verified
tensor(360.8012, grad_fn=<AddBackward0>)
tensor(357.8290, grad_fn=<AddBackward0>)
tensor(347.7727, grad_fn=<AddBackward0>)
tensor(322.9131, grad_fn=<AddBackward0>)
tensor(311.0620, grad_fn=<AddBackward0>)
tensor(301.3166, grad_fn=<AddBackward0>)
tensor(291.4036, grad_fn=<AddBackward0>)
tensor(283.9122, grad_fn=<AddBackward0>)
tensor(276.8408, grad_fn=<AddBackward0>)
tensor(270.4795, grad_fn=<AddBackward0>)
tensor(264.6440, grad_fn=<AddBackward0>)
tensor(259.1889, grad_fn=<AddBackward0>)
tensor(254.0679, grad_fn=<AddBackward0>)
tensor(249.3576, grad_fn=<AddBackward0>)
tensor(245.0429, grad_fn=<AddBackward0>)
tensor(241.1004, grad_fn=<AddBackward0>)
tensor(237.3984, grad_fn=<AddBackward0>)
tensor(233.8950, grad_fn=<AddBackward0>)
tensor(230.5357, grad_fn=<AddBackward0>)
tensor(227.2878, grad_fn=<AddBackward0>)

可以打断点观察module -> weight data和grad的变化：

刚开始data从dataloader里取值，grad = 0，经过一次result_loss.backward()，会计算出新的梯度值grad，每次经过一个新的循环，data重新取值，optim.zero_grad()将梯度置为0，再重新计算grad值。

P25 现有模型修改（vgg16为例）

25.1 vgg16

pretrained：是否与训练，pretrained=true，网络模型的参数在数据集上已经训练好了，pretrained=flase，使用的就是初始化的参数。

progress：是否在运行的框下面显示进度条，progress=true显示，progress=flase不显示

25.2 imagenet

有15000张图片，然后分为1000个类别

本来想和之前一样，用cifar10验证一下相关的数据集，但是用之前的方法已经无法在pytorch里直接下载了，太大了，有需要的可以去官网直接下载放在相关文件路径下面。

这几个参数也比较熟悉了

root：文件存放路径

spilt：spilt = ‘train’ 为训练集

transform :把数据集里的数据转为tensor类型

target_transform: 把目标的数据转为ensor数据类型

loader：在给定路径下加载数据

import torchvision
from torch import nn

# dataset = torchvision.datasets.imagenet("imagenet",split='train' ) 已经非公开了，需要自己下载
# pretain就是是否预训练
vgg16_false = torchvision.models.vgg16(pretrained=False)  # 加载网络模型，就像之前写的cifar10的那个代码，其中的参数就是默认的，不用下载
vgg16_true = torchvision.models.vgg16(pretrained=True)   # 就要去下载比如卷积层，池化层的参数，在imgenet里训练好的
print("okk")


vgg16_true.add_module('new_linear',nn.Linear(1000,10))     # 加在vgg_16的框架下
vgg16_true.classifier.add_module('new_linear',nn.Linear(1000,10))  # 加在vgg_16.classifier下面
print(vgg16_true)

vgg16_false.classifier[6] = nn.Linear(in_features=4096,out_features=10)   # 可以直接修改某一层的输入和输出
print(vgg16_false)

输出：

okk
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
    (new_linear): Linear(in_features=1000, out_features=10, bias=True)
  )
  (new_linear): Linear(in_features=1000, out_features=10, bias=True)
)
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=10, bias=True)
  )
)

进程已结束,退出代码0

P26 模型的保存与加载

26.1 模型的保存

方法一：保存模型的结构和参数

import torch
import torchvision

vgg16 = torchvision.models.vgg16(pretrained=False)    # 获取网络模型
# 保存方式1 保存模型结构+参数
torch.save(vgg16,"vgg16_method1.pth")

方法二：以字典的形式只保存了模型的参数

import torch
import torchvision
# 保存方式2，模型参数（官方推荐，空间会稍微小一点）
torch.save(vgg16.state_dict(),"vgg16_method2.pth")  # 保存为一种字典形式

不管是那种方法，左边文件会以命名方式保存

26.2 数据的加载：

26.2.1 以方法一加载：

import torch
import torchvision

# 保存方式1 -> 模型加载1
model = torch.load("vgg16_method1.pth")
print(model)

输出：

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)

26.1.1 方法一存在缺陷：

如果不是网络现有的模型，是自己写的模型的话，加载模型所在目录就会出现报错：

自己的模型及保存文件格式为：

import torch
from torch import nn
from torch.nn import Conv2d

class Haha(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = Conv2d(in_channels=3,out_channels=32,kernel_size=5)

    def forward(self, x):
        x = self.conv1(x)
        return x

haha = Haha()
torch.save(haha, "haha.pth")   # haha.pth为保存路径

新建python文件：

import torch

model = torch.load("haha.pth")
print(model)

会出现报错

解决方法为：

把自己写的网络模型class类在加载页面再复制一次

import torch
from torch import nn
from torch.nn import Conv2d

class Haha(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = Conv2d(in_channels=3,out_channels=32,kernel_size=5)

    def forward(self, x):
        x = self.conv1(x)
        return x

model = torch.load("haha.pth")
print(model)

```
from model_save import *
```

from model_save import *


model = torch.load("haha.pth")
print(model)

输出结果如上

26.2.2 以方法二加载

import torch
import torchvision

# 保存方式2 -> 模型加载2
model = torch.load("vgg16_method2.pth")
print(model)

输出的还是字典形式：

OrderedDict([('features.0.weight', tensor([[[[-0.0323,  0.0177,  0.0042],
          [-0.0171, -0.0156,  0.0780],
          [-0.0617,  0.0712, -0.0670]],

         [[ 0.0193, -0.0177,  0.0217],
          [ 0.0202,  0.0395, -0.0228],
          [ 0.0152, -0.0815,  0.0166]],

         [[ 0.0769, -0.0474,  0.0711],
          [ 0.0329, -0.0049, -0.0085],
          [ 0.0684, -0.0518,  0.0332]]],

把保存为字典型的恢复为网络模型：

import torch
import torchvision

# 保存方式2 -> 模型加载2
vgg16 = torchvision.models.vgg16(pretrained=False)    # 打开模型
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))    # 加载vgg16原来的状态
# model = torch.load("vgg16_method2.pth")   # 获得的就是字典形式
print(vgg16)

输出的结果为：

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)

进程已结束,退出代码0

P 27 完整的训练套路（一）

27.1 训练套路总结

创建dataset数据集，分为训练集和测试集
用dataloader进行数据加载
用len（）查看数据集长度，比如cifar10 训练集和测试集各有多少张图片
自己写的模型model model文件和train文件要在同一个目录下，from model import *
写损失函数 loss
选择合适的优化器
设置网络训练参数
调用haha.train()让网络进入训练状态（传数据，过模型得到output，loss，梯度置零，反向传播，打印~）
测试步骤开始haha.eval() with torch.no_grad()测试不需要对梯度进行调整，也不需要进行优化（取数据，过模型，得误差，得准确率）
展示效果（tensorboard print torch.save()

采用cifar 10模型

27.2 modoel文件：

import torch
from torch import nn

class Haha(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=32,kernel_size=5,stride=1,padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(in_channels=32,out_channels=32,kernel_size=5,stride=1,padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        x = self.model(x)
        return x

 # 验证自己的模型是否正确
if __name__ == '__main__':
    haha = Haha()
    input = torch.ones((64, 3, 32, 32))
    output = haha(input)
    print(output.shape)

27.3 train文件夹：

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from model import *


# 选择gpu跑数据
device = torch.device("cuda")
print(device)

# 准备数据集
dataset_train = torchvision.datasets.CIFAR10("cifar10",train=True,download=True,transform=torchvision.transforms.ToTensor())
dataset_test = torchvision.datasets.CIFAR10("cifar10",train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset_train, batch_size=64)

# 查看数据长度
train_len = len(dataset_train)
test_len = len(dataset_test)
print("训练集的长度为：{}".format(train_len))
print("测试集的长度为：{}".format(test_len))

# 创建模型：
haha = Haha()
haha = haha.to(device)

# 损失函数
loss = nn.CrossEntropyLoss()
loss = loss.to(device)

# 优化器
optim = torch.optim.SGD(haha.parameters(), lr=1e-2)

# 参数准备：
epoch = 20

# 反向传播

for i in range(epoch):
    print("-----第{}轮训练开始-----".format(i+1))
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = haha(imgs)
        result_loss = loss(outputs, targets)

        # 优化器优化模型
        optim.zero_grad()
        result_loss.backward()
        optim.step()
        running_loss = running_loss + result_loss
    print(running_loss)

输出结果为：

cuda
Files already downloaded and verified
Files already downloaded and verified
训练集的长度为：50000
测试集的长度为：10000
-----第1轮训练开始-----
tensor(1700.0593, device='cuda:0', grad_fn=<AddBackward0>)
-----第2轮训练开始-----
tensor(1440.8804, device='cuda:0', grad_fn=<AddBackward0>)
-----第3轮训练开始-----
tensor(1288.2825, device='cuda:0', grad_fn=<AddBackward0>)
-----第4轮训练开始-----
tensor(1199.8562, device='cuda:0', grad_fn=<AddBackward0>)
-----第5轮训练开始-----
tensor(1141.1815, device='cuda:0', grad_fn=<AddBackward0>)
-----第6轮训练开始-----
tensor(1089.9484, device='cuda:0', grad_fn=<AddBackward0>)
-----第7轮训练开始-----
tensor(1041.3491, device='cuda:0', grad_fn=<AddBackward0>)
-----第8轮训练开始-----
tensor(995.0281, device='cuda:0', grad_fn=<AddBackward0>)
-----第9轮训练开始-----
tensor(950.4661, device='cuda:0', grad_fn=<AddBackward0>)
-----第10轮训练开始-----
tensor(908.3196, device='cuda:0', grad_fn=<AddBackward0>)
-----第11轮训练开始-----
tensor(870.1450, device='cuda:0', grad_fn=<AddBackward0>)
-----第12轮训练开始-----
tensor(836.3250, device='cuda:0', grad_fn=<AddBackward0>)
-----第13轮训练开始-----
tensor(806.3509, device='cuda:0', grad_fn=<AddBackward0>)
-----第14轮训练开始-----
tensor(779.3769, device='cuda:0', grad_fn=<AddBackward0>)
-----第15轮训练开始-----
tensor(754.8840, device='cuda:0', grad_fn=<AddBackward0>)
-----第16轮训练开始-----
tensor(731.9321, device='cuda:0', grad_fn=<AddBackward0>)
-----第17轮训练开始-----
tensor(710.3835, device='cuda:0', grad_fn=<AddBackward0>)
-----第18轮训练开始-----
tensor(690.2420, device='cuda:0', grad_fn=<AddBackward0>)
-----第19轮训练开始-----
tensor(671.0947, device='cuda:0', grad_fn=<AddBackward0>)
-----第20轮训练开始-----
tensor(652.6206, device='cuda:0', grad_fn=<AddBackward0>)

进程已结束,退出代码0

P28 完整的训练套路(二）

28.1 准确率

准确率 = 所有预测对的图片/测试集图片总个数

具体的实现思路需要调用一下argmax函数

argmax（）是可以获取输出的outpu最大概率类别所在的位置

28.1.1 预测正确率代码（argmax使用）

import torch

# 就相当于输入两张图片，outputs输出预测第一章图片是0类别的概率是0.1，是1类别的概率是0.2
pic_sum = 2
outputs = torch.tensor([[0.1, 0.2],
                        [0.3, 0.4]])
print(outputs.argmax(1))     # argmax(0)横向看，argmax(1)纵向看，tensor([1, 1]) 说明output输出的两个都是1类别
predict = outputs.argmax(1)
targets = torch.tensor([0, 1])   # 给定目标种类，第一个是0类别， 第二个是1类别
accuracy = (predict == targets).sum()   # 预测正确的个数有多少
print(accuracy)
print("准确率为：{}".format(accuracy/pic_sum))    # 准确率= 正确预测个数/总图片个数

28.2 完整的代码：

我的cpu跑不动，借用了一下后面用gpu训练的部分内容

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *


# 使用cuda训练数据
device = torch.device("cuda")
print(device)

# 准备数据集
dataset_train = torchvision.datasets.CIFAR10("cifar10",train=True,download=True,transform=torchvision.transforms.ToTensor())
dataset_test = torchvision.datasets.CIFAR10("cifar10",train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader_train = DataLoader(dataset_train, batch_size=64)
dataloader_test = DataLoader(dataset_test, batch_size=64)

# 查看数据长度
train_len = len(dataset_train)
test_len = len(dataset_test)
print("训练集的长度为：{}".format(train_len))
print("测试集的长度为：{}".format(test_len))

# 创建模型：
haha = Haha()
haha = haha.to(device)

# 损失函数
loss = nn.CrossEntropyLoss()
loss = loss.to(device)

# 优化器
optim = torch.optim.SGD(haha.parameters(), lr=1e-2)

# 用tensorboard画图咯
writer = SummaryWriter("logs_train")

# 训练开始
# 参数准备：
epoch = 20
global_step = 1
for i in range(epoch):
    print("-----第{}轮训练开始-----".format(i+1))
    step = 1
    # 计算损失函数
    for data in dataloader_train:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = haha(imgs)
        result_loss = loss(outputs, targets)

        # 优化器优化模型
        optim.zero_grad()
        result_loss.backward()
        optim.step()

       # 训练轮数逢百打印
        if step % 100 == 0:
            print("训练次数：{}次,损失为：{}，".format(step, result_loss.item()))   # item（）
            writer.add_scalar("train_result_loss", result_loss,step)
        step += 1

    # 测试步骤开始
    running_loss = 0
    total_accuracy_test = 0
    with torch.no_grad():  # 新增的tensor没有梯度，使带梯度的tensor能够进行原地运算。
        for data in dataloader_test:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = haha(imgs)
            result_loss = loss(outputs, targets)  # 该loss为部分数据在网络模型上的损失，为tensor数据类型
            # 求整体测试数据集上的误差
            running_loss = running_loss + result_loss.item()  # result_loss为tensor数据类型，running_loss为
            #  求整体测试集上的正确率
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy_test = total_accuracy_test + accuracy

    print("第{}轮 总损失为：{}".format(global_step, running_loss))
    writer.add_scalar("train_running_loss", running_loss, global_step)
    print("第{}轮 准确率为：{}".format(global_step, total_accuracy_test/test_len))
    writer.add_scalar("total_accuracy", total_accuracy_test, global_step)
    global_step += 1
    # 保存每一轮的模型
    model = torch.save(haha,"haha_train_moedl{}.pth".format(i+1))
    print("第{}轮的模型已保存".format(i+1))

writer.close()

输出保存模型在左侧目录显示为：

cuda
Files already downloaded and verified
Files already downloaded and verified
训练集的长度为：50000
测试集的长度为：10000
-----第1轮训练开始-----
训练次数：100次,损失为：2.2931113243103027，
训练次数：200次,损失为：2.284919500350952，
训练次数：300次,损失为：2.2788472175598145，
训练次数：400次,损失为：2.2056031227111816，
训练次数：500次,损失为：2.0810277462005615，
训练次数：600次,损失为：2.0361318588256836，
训练次数：700次,损失为：2.022759437561035，
第1轮 总损失为：317.60839223861694
第1轮 准确率为：0.27410000562667847
第1轮的模型已保存
-----第2轮训练开始-----
训练次数：100次,损失为：2.0008018016815186，
训练次数：200次,损失为：1.9731968641281128，
训练次数：300次,损失为：1.9332154989242554，
训练次数：400次,损失为：1.8408057689666748，
训练次数：500次,损失为：1.8841831684112549，
训练次数：600次,损失为：1.8792617321014404，
训练次数：700次,损失为：1.8609519004821777，
第2轮 总损失为：300.46228551864624
第2轮 准确率为：0.3188000023365021
第2轮的模型已保存
-----第3轮训练开始-----
训练次数：100次,损失为：1.7655715942382812，
训练次数：200次,损失为：1.760992407798767，
训练次数：300次,损失为：1.7241920232772827，
训练次数：400次,损失为：1.6210854053497314，
训练次数：500次,损失为：1.7465519905090332，
训练次数：600次,损失为：1.7013258934020996，
训练次数：700次,损失为：1.7307485342025757，
第3轮 总损失为：267.0808128118515
第3轮 准确率为：0.3837999999523163
第3轮的模型已保存
-----第4轮训练开始-----
训练次数：100次,损失为：1.5298057794570923，
训练次数：200次,损失为：1.604145884513855，
训练次数：300次,损失为：1.5831634998321533，
训练次数：400次,损失为：1.4993634223937988，
训练次数：500次,损失为：1.5525864362716675，
训练次数：600次,损失为：1.5678906440734863，
训练次数：700次,损失为：1.6980340480804443，
第4轮 总损失为：267.1874620914459
第4轮 准确率为：0.3865000009536743
第4轮的模型已保存
-----第5轮训练开始-----
训练次数：100次,损失为：1.3614977598190308，
训练次数：200次,损失为：1.4923309087753296，
训练次数：300次,损失为：1.501603126525879，
训练次数：400次,损失为：1.4064840078353882，
训练次数：500次,损失为：1.4037704467773438，
训练次数：600次,损失为：1.4647917747497559，
训练次数：700次,损失为：1.6546450853347778，
第5轮 总损失为：254.799556016922
第5轮 准确率为：0.4179999828338623
第5轮的模型已保存
-----第6轮训练开始-----
训练次数：100次,损失为：1.2472984790802002，
训练次数：200次,损失为：1.3790236711502075，
训练次数：300次,损失为：1.4547381401062012，
训练次数：400次,损失为：1.3174049854278564，
训练次数：500次,损失为：1.2885082960128784，
训练次数：600次,损失为：1.3783830404281616，
训练次数：700次,损失为：1.5873558521270752，
第6轮 总损失为：241.37652945518494
第6轮 准确率为：0.44200000166893005
第6轮的模型已保存
-----第7轮训练开始-----
训练次数：100次,损失为：1.1562687158584595，
训练次数：200次,损失为：1.2875163555145264，
训练次数：300次,损失为：1.4133583307266235，
训练次数：400次,损失为：1.254225492477417，
训练次数：500次,损失为：1.1896222829818726，
训练次数：600次,损失为：1.3112317323684692，
训练次数：700次,损失为：1.5086911916732788，
第7轮 总损失为：230.23083698749542
第7轮 准确率为：0.4754999876022339
第7轮的模型已保存
-----第8轮训练开始-----
训练次数：100次,损失为：1.086610198020935，
训练次数：200次,损失为：1.2097818851470947，
训练次数：300次,损失为：1.3678902387619019，
训练次数：400次,损失为：1.2088969945907593，
训练次数：500次,损失为：1.1163411140441895，
训练次数：600次,损失为：1.247499942779541，
训练次数：700次,损失为：1.4252394437789917，
第8轮 总损失为：218.35910069942474
第8轮 准确率为：0.5044999718666077
第8轮的模型已保存
-----第9轮训练开始-----
训练次数：100次,损失为：1.037935733795166，
训练次数：200次,损失为：1.141263484954834，
训练次数：300次,损失为：1.3190100193023682，
训练次数：400次,损失为：1.1700167655944824，
训练次数：500次,损失为：1.0646848678588867，
训练次数：600次,损失为：1.1844598054885864，
训练次数：700次,损失为：1.3446803092956543，
第9轮 总损失为：209.00663626194
第9轮 准确率为：0.527999997138977
第9轮的模型已保存
-----第10轮训练开始-----
训练次数：100次,损失为：0.9983921647071838，
训练次数：200次,损失为：1.0837466716766357，
训练次数：300次,损失为：1.2751736640930176，
训练次数：400次,损失为：1.1298927068710327，
训练次数：500次,损失为：1.0229113101959229，
训练次数：600次,损失为：1.1295039653778076，
训练次数：700次,损失为：1.2733806371688843，
第10轮 总损失为：200.50266510248184
第10轮 准确率为：0.5493999719619751
第10轮的模型已保存
-----第11轮训练开始-----
训练次数：100次,损失为：0.966463565826416，
训练次数：200次,损失为：1.0211725234985352，
训练次数：300次,损失为：1.2367210388183594，
训练次数：400次,损失为：1.088995099067688，
训练次数：500次,损失为：0.9786638021469116，
训练次数：600次,损失为：1.0866005420684814，
训练次数：700次,损失为：1.2103657722473145，
第11轮 总损失为：194.7027866244316
第11轮 准确率为：0.5641999840736389
第11轮的模型已保存
-----第12轮训练开始-----
训练次数：100次,损失为：0.9357775449752808，
训练次数：200次,损失为：0.9653837084770203，
训练次数：300次,损失为：1.2010201215744019，
训练次数：400次,损失为：1.0523639917373657，
训练次数：500次,损失为：0.9388719797134399，
训练次数：600次,损失为：1.045914888381958，
训练次数：700次,损失为：1.1654292345046997，
第12轮 总损失为：191.47566080093384
第12轮 准确率为：0.5737999677658081
第12轮的模型已保存
-----第13轮训练开始-----
训练次数：100次,损失为：0.9041438102722168，
训练次数：200次,损失为：0.9161452054977417，
训练次数：300次,损失为：1.1599822044372559，
训练次数：400次,损失为：1.013271450996399，
训练次数：500次,损失为：0.9019820094108582，
训练次数：600次,损失为：1.0087833404541016，
训练次数：700次,损失为：1.125746726989746，
第13轮 总损失为：187.77414095401764
第13轮 准确率为：0.5839999914169312
第13轮的模型已保存
-----第14轮训练开始-----
训练次数：100次,损失为：0.8752296566963196，
训练次数：200次,损失为：0.8772607445716858，
训练次数：300次,损失为：1.1191498041152954，
训练次数：400次,损失为：0.9737100005149841，
训练次数：500次,损失为：0.8703413009643555，
训练次数：600次,损失为：0.9731168746948242，
训练次数：700次,损失为：1.0966637134552002，
第14轮 总损失为：185.168947160244
第14轮 准确率为：0.5892999768257141
第14轮的模型已保存
-----第15轮训练开始-----
训练次数：100次,损失为：0.849646806716919，
训练次数：200次,损失为：0.8409485816955566，
训练次数：300次,损失为：1.0751159191131592，
训练次数：400次,损失为：0.9429139494895935，
训练次数：500次,损失为：0.8454231023788452，
训练次数：600次,损失为：0.937741219997406，
训练次数：700次,损失为：1.0663700103759766，
第15轮 总损失为：182.5419823527336
第15轮 准确率为：0.5976999998092651
第15轮的模型已保存
-----第16轮训练开始-----
训练次数：100次,损失为：0.8328421115875244，
训练次数：200次,损失为：0.8099387288093567，
训练次数：300次,损失为：1.0304874181747437，
训练次数：400次,损失为：0.9107857346534729，
训练次数：500次,损失为：0.8217724561691284，
训练次数：600次,损失为：0.9004014134407043，
训练次数：700次,损失为：1.0346449613571167，
第16轮 总损失为：179.81149476766586
第16轮 准确率为：0.6050999760627747
第16轮的模型已保存
-----第17轮训练开始-----
训练次数：100次,损失为：0.8141376376152039，
训练次数：200次,损失为：0.7860156893730164，
训练次数：300次,损失为：0.9776766300201416，
训练次数：400次,损失为：0.8795670866966248，
训练次数：500次,损失为：0.803113579750061，
训练次数：600次,损失为：0.8628700971603394，
训练次数：700次,损失为：1.0075130462646484，
第17轮 总损失为：177.66287940740585
第17轮 准确率为：0.6111999750137329
第17轮的模型已保存
-----第18轮训练开始-----
训练次数：100次,损失为：0.8004648089408875，
训练次数：200次,损失为：0.7675616145133972，
训练次数：300次,损失为：0.9339349865913391，
训练次数：400次,损失为：0.8534804582595825，
训练次数：500次,损失为：0.7856743931770325，
训练次数：600次,损失为：0.8288229703903198，
训练次数：700次,损失为：0.9895642399787903，
第18轮 总损失为：175.08234637975693
第18轮 准确率为：0.6191999912261963
第18轮的模型已保存
-----第19轮训练开始-----
训练次数：100次,损失为：0.7893921732902527，
训练次数：200次,损失为：0.7497053742408752，
训练次数：300次,损失为：0.8933553099632263，
训练次数：400次,损失为：0.8314535021781921，
训练次数：500次,损失为：0.763375997543335，
训练次数：600次,损失为：0.7902735471725464，
训练次数：700次,损失为：0.9701849818229675，
第19轮 总损失为：173.36062556505203
第19轮 准确率为：0.6258999705314636
第19轮的模型已保存
-----第20轮训练开始-----
训练次数：100次,损失为：0.7721090912818909，
训练次数：200次,损失为：0.7328221797943115，
训练次数：300次,损失为：0.8560763597488403，
训练次数：400次,损失为：0.8105795383453369，
训练次数：500次,损失为：0.7467403411865234，
训练次数：600次,损失为：0.7569105625152588，
训练次数：700次,损失为：0.9446259140968323，
第20轮 总损失为：172.8064525127411
第20轮 准确率为：0.6273999810218811
第20轮的模型已保存

进程已结束,退出代码0

tensorboard:

因为我设置的每一轮epoch都默认从0开始到700多轮，所有从上面可以看出轮数增加，总体的损失率是越来越小的。

P29 完整的训练套路（三）

在训练步骤开始之前设置haha.train(),在测试步骤前设置haha.eval()

这两个方法调用的时候就会设置模型进入相应的状态（如训练状态和测试状态），只会对网络层中的特定层起作用，如dropout和batch_norm。起控制某些层的作用，训练时和测试时是不一样的。

dropout：是使指定概率的权重随机失活，作用是加快训练速度，防止过拟合

batch_norm：就是把因为层数太多，数据分布逐渐离散，把这些值强行拉回到非线性函数敏感的区域，避免梯度消失，加快训练速度。

就是在训练套路（二）的代码里各加了一行：

P29 用GPU训练（一）

29.1 .cuda()

主要采用.cuda()，根据上面创建的train文件，复制全部代码，只需要做部分改动

可以

加在自己创建的网络模型后面；haha = haha.cuda()
损失函数后面；loss = loss.cuda()
数据后面；imgs = imgs.cuda() targets = targets.cuda()

只要用cuda，就要在前面加一行命令判断一下自己有没有gpu，然后剩下的代码再缩进写：

if torch.cuda.is_available():

haha = haha.cuda() # haha是我自己的模型

29.2 time计时器

可以在用time包输出自己训练一轮所需要的时间

import time    # 计时器的包

start_time = time.time()
end_time = time.time()
print("训练一轮所需时间：{}".format(end_time - start_time)

如果电脑上没有gpu，可以放在google.colab上新建笔记本去跑（需要谷歌账号，每个月可以有30h试用）

29.3 完整代码：

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *
import time

# # 使用cuda训练数据
# device = torch.device("cuda")
# print(device)

# 准备数据集
dataset_train = torchvision.datasets.CIFAR10("cifar10",train=True,download=True,transform=torchvision.transforms.ToTensor())
dataset_test = torchvision.datasets.CIFAR10("cifar10",train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader_train = DataLoader(dataset_train, batch_size=64)
dataloader_test = DataLoader(dataset_test, batch_size=64)

# 查看数据长度
train_len = len(dataset_train)
test_len = len(dataset_test)
print("训练集的长度为：{}".format(train_len))
print("测试集的长度为：{}".format(test_len))

# 创建模型：
class Haha(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=32,kernel_size=5,stride=1,padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(in_channels=32,out_channels=32,kernel_size=5,stride=1,padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        x = self.model(x)
        return x
haha = Haha()
if torch.cuda.is_available():
    haha = haha.cuda()

# 损失函数
loss = nn.CrossEntropyLoss()
loss = loss.cuda()

# 优化器
optim = torch.optim.SGD(haha.parameters(), lr=1e-2)

# 用tensorboard画图咯
writer = SummaryWriter("logs_train")

# 训练开始
# 参数准备：
epoch = 20
global_step = 1
for i in range(epoch):
    print("-----第{}轮训练开始-----".format(i+1))
    step = 1
    # 训练步骤开始
    start_time = time.time()
    haha.train()
    for data in dataloader_train:
        imgs, targets = data
        imgs = imgs.cuda()
        targets = targets.cuda()
        outputs = haha(imgs)
        result_loss = loss(outputs, targets)

        # 优化器优化模型
        optim.zero_grad()
        result_loss.backward()
        optim.step()

       # 训练轮数逢百打印
        if step % 100 == 0:
            print("训练次数：{}次,损失为：{}，".format(step, result_loss.item()))   # item（）
            writer.add_scalar("train_result_loss", result_loss,step)
        step += 1


    # 测试步骤开始
    haha.eval()
    running_loss = 0
    total_accuracy_test = 0
    with torch.no_grad():  # 新增的tensor没有梯度，使带梯度的tensor能够进行原地运算。
        for data in dataloader_test:
            imgs, targets = data
            imgs = imgs.cuda()
            targets = targets.cuda()
            outputs = haha(imgs)
            result_loss = loss(outputs, targets)  # 该loss为部分数据在网络模型上的损失，为tensor数据类型
            # 求整体测试数据集上的误差
            running_loss = running_loss + result_loss.item()  # result_loss为tensor数据类型，running_loss为
            #  求整体测试集上的正确率
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy_test = total_accuracy_test + accuracy

    print("第{}轮 总损失为：{}".format(global_step, running_loss))
    writer.add_scalar("train_running_loss", running_loss, global_step)
    print("第{}轮 准确率为：{}".format(global_step, total_accuracy_test/test_len))
    writer.add_scalar("total_accuracy", total_accuracy_test, global_step)
    # 保存每一轮的模型
    # model = torch.save(haha,"haha_train_moedl{}.pth".format(i+1))
    print("第{}轮的模型已保存".format(i+1))
    endtime = time.time()
    print("训练第{}轮的时间为：{}".format(global_step,endtime - start_time))
    global_step += 1

writer.close()

输出😊

E:\anaconda\envs\py38\python.exe E:\pycharm\project\project_test\test_0320\train1_gpu.py 
Files already downloaded and verified
Files already downloaded and verified
训练集的长度为：50000
测试集的长度为：10000
-----第1轮训练开始-----
训练次数：100次,损失为：2.2905967235565186，
训练次数：200次,损失为：2.2821202278137207，
训练次数：300次,损失为：2.245570659637451，
训练次数：400次,损失为：2.1656620502471924，
训练次数：500次,损失为：2.033987283706665，
训练次数：600次,损失为：1.9986780881881714，
训练次数：700次,损失为：1.9924761056900024，
第1轮 总损失为：312.432296872139
第1轮 准确率为：0.2895999848842621
第1轮的模型已保存
训练第1轮的时间为：24.783493995666504
-----第2轮训练开始-----
训练次数：100次,损失为：1.937050700187683，
训练次数：200次,损失为：1.9323559999465942，
训练次数：300次,损失为：1.8719874620437622，
训练次数：400次,损失为：1.773117184638977，
训练次数：500次,损失为：1.8087986707687378，
训练次数：600次,损失为：1.8122302293777466，
训练次数：700次,损失为：1.8081324100494385，
第2轮 总损失为：297.780638217926
第2轮 准确率为：0.3287999927997589
第2轮的模型已保存
训练第2轮的时间为：9.272233247756958
-----第3轮训练开始-----
训练次数：100次,损失为：1.6745221614837646，
训练次数：200次,损失为：1.6618083715438843，
训练次数：300次,损失为：1.6877068281173706，
训练次数：400次,损失为：1.5887961387634277，
训练次数：500次,损失为：1.6719287633895874，
训练次数：600次,损失为：1.6717188358306885，
训练次数：700次,损失为：1.7274123430252075，
第3轮 总损失为：259.36444902420044
第3轮 准确率为：0.4068000018596649
第3轮的模型已保存
训练第3轮的时间为：9.027472496032715
-----第4轮训练开始-----
训练次数：100次,损失为：1.485672950744629，
训练次数：200次,损失为：1.5527979135513306，
训练次数：300次,损失为：1.5899217128753662，
训练次数：400次,损失为：1.4958852529525757，
训练次数：500次,损失为：1.5291996002197266，
训练次数：600次,损失为：1.570622205734253，
训练次数：700次,损失为：1.6631007194519043，
第4轮 总损失为：254.18749344348907
第4轮 准确率为：0.4171999990940094
第4轮的模型已保存
训练第4轮的时间为：9.231419324874878
-----第5轮训练开始-----
训练次数：100次,损失为：1.3498332500457764，
训练次数：200次,损失为：1.4666500091552734，
训练次数：300次,损失为：1.5175186395645142，
训练次数：400次,损失为：1.4253044128417969，
训练次数：500次,损失为：1.4136509895324707，
训练次数：600次,损失为：1.4621104001998901，
训练次数：700次,损失为：1.6403976678848267，
第5轮 总损失为：251.01690125465393
第5轮 准确率为：0.42719998955726624
第5轮的模型已保存
训练第5轮的时间为：9.161771059036255
-----第6轮训练开始-----
训练次数：100次,损失为：1.2514092922210693，
训练次数：200次,损失为：1.3725334405899048，
训练次数：300次,损失为：1.4507195949554443，
训练次数：400次,损失为：1.3392728567123413，
训练次数：500次,损失为：1.319530725479126，
训练次数：600次,损失为：1.3608940839767456，
训练次数：700次,损失为：1.6041128635406494，
第6轮 总损失为：243.93001222610474
第6轮 准确率为：0.44369998574256897
第6轮的模型已保存
训练第6轮的时间为：9.073267221450806
-----第7轮训练开始-----
训练次数：100次,损失为：1.1704820394515991，
训练次数：200次,损失为：1.2850078344345093，
训练次数：300次,损失为：1.3898333311080933，
训练次数：400次,损失为：1.2684869766235352，
训练次数：500次,损失为：1.231413722038269，
训练次数：600次,损失为：1.2794348001480103，
训练次数：700次,损失为：1.524946928024292，
第7轮 总损失为：231.0968475341797
第7轮 准确率为：0.47360000014305115
第7轮的模型已保存
训练第7轮的时间为：9.101604700088501
-----第8轮训练开始-----
训练次数：100次,损失为：1.1044495105743408，
训练次数：200次,损失为：1.204076886177063，
训练次数：300次,损失为：1.3326046466827393，
训练次数：400次,损失为：1.2104219198226929，
训练次数：500次,损失为：1.1481982469558716，
训练次数：600次,损失为：1.2094660997390747，
训练次数：700次,损失为：1.4364663362503052，
第8轮 总损失为：218.9771716594696
第8轮 准确率为：0.5047000050544739
第8轮的模型已保存
训练第8轮的时间为：8.950648784637451
-----第9轮训练开始-----
训练次数：100次,损失为：1.0550774335861206，
训练次数：200次,损失为：1.1339002847671509，
训练次数：300次,损失为：1.28956139087677，
训练次数：400次,损失为：1.163724422454834，
训练次数：500次,损失为：1.0837416648864746，
训练次数：600次,损失为：1.151116132736206，
训练次数：700次,损失为：1.3548895120620728，
第9轮 总损失为：210.46681082248688
第9轮 准确率为：0.5266000032424927
第9轮的模型已保存
训练第9轮的时间为：8.987959861755371
-----第10轮训练开始-----
训练次数：100次,损失为：1.0218291282653809，
训练次数：200次,损失为：1.0866893529891968，
训练次数：300次,损失为：1.2467507123947144，
训练次数：400次,损失为：1.1070494651794434，
训练次数：500次,损失为：1.0290584564208984，
训练次数：600次,损失为：1.108553171157837，
训练次数：700次,损失为：1.2934924364089966，
第10轮 总损失为：203.53999710083008
第10轮 准确率为：0.5410999655723572
第10轮的模型已保存
训练第10轮的时间为：8.914040565490723
-----第11轮训练开始-----
训练次数：100次,损失为：0.9902496337890625，
训练次数：200次,损失为：1.0509400367736816，
训练次数：300次,损失为：1.2013808488845825，
训练次数：400次,损失为：1.0529910326004028，
训练次数：500次,损失为：0.9824795722961426，
训练次数：600次,损失为：1.0781936645507812，
训练次数：700次,损失为：1.2365015745162964，
第11轮 总损失为：197.5236524939537
第11轮 准确率为：0.5551999807357788
第11轮的模型已保存
训练第11轮的时间为：9.488547325134277
-----第12轮训练开始-----
训练次数：100次,损失为：0.9615802764892578，
训练次数：200次,损失为：1.0197076797485352，
训练次数：300次,损失为：1.155418872833252，
训练次数：400次,损失为：0.9988523125648499，
训练次数：500次,损失为：0.9423302412033081，
训练次数：600次,损失为：1.0484671592712402，
训练次数：700次,损失为：1.1914249658584595，
第12轮 总损失为：191.97088432312012
第12轮 准确率为：0.5659999847412109
第12轮的模型已保存
训练第12轮的时间为：9.273144483566284
-----第13轮训练开始-----
训练次数：100次,损失为：0.9293488264083862，
训练次数：200次,损失为：0.9909013509750366，
训练次数：300次,损失为：1.105408787727356，
训练次数：400次,损失为：0.9607524275779724，
训练次数：500次,损失为：0.907731294631958，
训练次数：600次,损失为：1.0210427045822144，
训练次数：700次,损失为：1.1465009450912476，
第13轮 总损失为：187.52046293020248
第13轮 准确率为：0.5787999629974365
第13轮的模型已保存
训练第13轮的时间为：9.238065958023071
-----第14轮训练开始-----
训练次数：100次,损失为：0.9020389914512634，
训练次数：200次,损失为：0.955945611000061，
训练次数：300次,损失为：1.055712342262268，
训练次数：400次,损失为：0.9302864074707031，
训练次数：500次,损失为：0.8751024007797241，
训练次数：600次,损失为：0.9931136965751648，
训练次数：700次,损失为：1.1019436120986938，
第14轮 总损失为：183.5526888370514
第14轮 准确率为：0.5888000130653381
第14轮的模型已保存
训练第14轮的时间为：9.22055459022522
-----第15轮训练开始-----
训练次数：100次,损失为：0.8797929883003235，
训练次数：200次,损失为：0.9289506077766418，
训练次数：300次,损失为：1.0101605653762817，
训练次数：400次,损失为：0.9003520607948303，
训练次数：500次,损失为：0.8515262603759766，
训练次数：600次,损失为：0.9700659513473511，
训练次数：700次,损失为：1.0617284774780273，
第15轮 总损失为：180.8592730164528
第15轮 准确率为：0.5974000096321106
第15轮的模型已保存
训练第15轮的时间为：9.062849283218384
-----第16轮训练开始-----
训练次数：100次,损失为：0.8527998328208923，
训练次数：200次,损失为：0.9124150276184082，
训练次数：300次,损失为：0.9704921245574951，
训练次数：400次,损失为：0.8786951303482056，
训练次数：500次,损失为：0.8221790194511414，
训练次数：600次,损失为：0.945232093334198，
训练次数：700次,损失为：1.027095913887024，
第16轮 总损失为：178.32667046785355
第16轮 准确率为：0.6040999889373779
第16轮的模型已保存
训练第16轮的时间为：9.254530668258667
-----第17轮训练开始-----
训练次数：100次,损失为：0.8207857608795166，
训练次数：200次,损失为：0.8898065090179443，
训练次数：300次,损失为：0.9304372668266296，
训练次数：400次,损失为：0.862062394618988，
训练次数：500次,损失为：0.7994813323020935，
训练次数：600次,损失为：0.9187434315681458，
训练次数：700次,损失为：0.9988839626312256，
第17轮 总损失为：175.52347177267075
第17轮 准确率为：0.613099992275238
第17轮的模型已保存
训练第17轮的时间为：9.135975360870361
-----第18轮训练开始-----
训练次数：100次,损失为：0.7884131669998169，
训练次数：200次,损失为：0.8652814626693726，
训练次数：300次,损失为：0.8977620005607605，
训练次数：400次,损失为：0.8430650234222412，
训练次数：500次,损失为：0.7812496423721313，
训练次数：600次,损失为：0.896126389503479，
训练次数：700次,损失为：0.9740239381790161，
第18轮 总损失为：172.546732544899
第18轮 准确率为：0.6200000047683716
第18轮的模型已保存
训练第18轮的时间为：9.062570333480835
-----第19轮训练开始-----
训练次数：100次,损失为：0.7588167786598206，
训练次数：200次,损失为：0.8417664766311646，
训练次数：300次,损失为：0.865825891494751，
训练次数：400次,损失为：0.8266186118125916，
训练次数：500次,损失为：0.7649041414260864，
训练次数：600次,损失为：0.8651140332221985，
训练次数：700次,损失为：0.9490988850593567，
第19轮 总损失为：170.14829176664352
第19轮 准确率为：0.6286999583244324
第19轮的模型已保存
训练第19轮的时间为：9.376101016998291
-----第20轮训练开始-----
训练次数：100次,损失为：0.7320053577423096，
训练次数：200次,损失为：0.8210268020629883，
训练次数：300次,损失为：0.8383702635765076，
训练次数：400次,损失为：0.8133151531219482，
训练次数：500次,损失为：0.7523077130317688，
训练次数：600次,损失为：0.8363757133483887，
训练次数：700次,损失为：0.9279225468635559，
第20轮 总损失为：168.0762687921524
第20轮 准确率为：0.6351000070571899
第20轮的模型已保存
训练第20轮的时间为：8.956818103790283

进程已结束,退出代码0

P30 用gpu训练（二）

就是用更规范的语言表述，这种写法也更常用一点，前面的train代码因为电脑不行，已经加进去了

import torch

device = torch.device('cpu')    # 用cpu训练
device1 = torch.device('cuda')   # 只有一个显卡的时候，device1和device2是一样的
device2 = torch.device('cuda:0')
device3 = torch.device('cuda:1')   # 用第二个显卡训练
print(device)
print(device1)
print(device2)
print(device3)

通常会在前面加这句，意思是如果cuda可以用，就用cuda跑，如果不能用就用cpu

这样可以避免有时候不知道自己电脑有没有gpu，运行报错的问题

import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

下面完整的代码和train部分的一样，只是修改了模型、数据、损失函数

需要注意的是只有数据，图片，标注需要单独赋值，其余的只需要调用.to(device)就行了

但是为了方便记忆，会采用同样的处理方式，就是都赋值。

完整代码：

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *


# 使用cuda训练数据
device = torch.device("cuda")
print(device)

# 准备数据集
dataset_train = torchvision.datasets.CIFAR10("cifar10",train=True,download=True,transform=torchvision.transforms.ToTensor())
dataset_test = torchvision.datasets.CIFAR10("cifar10",train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader_train = DataLoader(dataset_train, batch_size=64)
dataloader_test = DataLoader(dataset_test, batch_size=64)

# 查看数据长度
train_len = len(dataset_train)
test_len = len(dataset_test)
print("训练集的长度为：{}".format(train_len))
print("测试集的长度为：{}".format(test_len))

# 创建模型：
haha = Haha()
haha = haha.to(device)

# 损失函数
loss = nn.CrossEntropyLoss()
loss = loss.to(device)

# 优化器
optim = torch.optim.SGD(haha.parameters(), lr=1e-2)

# 用tensorboard画图咯
writer = SummaryWriter("logs_train")

# 训练开始
# 参数准备：
epoch = 20
global_step = 1
for i in range(epoch):
    print("-----第{}轮训练开始-----".format(i+1))
    step = 1
    # 训练步骤开始
    haha.train()
    for data in dataloader_train:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        outputs = haha(imgs)
        result_loss = loss(outputs, targets)

        # 优化器优化模型
        optim.zero_grad()
        result_loss.backward()
        optim.step()

       # 训练轮数逢百打印
        if step % 100 == 0:
            print("训练次数：{}次,损失为：{}，".format(step, result_loss.item()))   # item（）
            writer.add_scalar("train_result_loss", result_loss,step)
        step += 1

    # 测试步骤开始
    haha.eval()
    running_loss = 0
    total_accuracy_test = 0
    with torch.no_grad():  # 新增的tensor没有梯度，使带梯度的tensor能够进行原地运算。
        for data in dataloader_test:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = haha(imgs)
            result_loss = loss(outputs, targets)  # 该loss为部分数据在网络模型上的损失，为tensor数据类型
            # 求整体测试数据集上的误差
            running_loss = running_loss + result_loss.item()  # result_loss为tensor数据类型，running_loss为
            #  求整体测试集上的正确率
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy_test = total_accuracy_test + accuracy

    print("第{}轮 总损失为：{}".format(global_step, running_loss))
    writer.add_scalar("train_running_loss", running_loss, global_step)
    print("第{}轮 准确率为：{}".format(global_step, total_accuracy_test/test_len))
    writer.add_scalar("total_accuracy", total_accuracy_test, global_step)
    global_step += 1
    # 保存每一轮的模型
    model = torch.save(haha,"haha_train_moedl{}.pth".format(i+1))
    print("第{}轮的模型已保存".format(i+1))

writer.close()

输出和train一样，就不放了。

P31 完整的验证套路

31.1 完整的训练步骤：

选择相应的图片，放在文件夹里（注意相对引用的使用写法）
resize图片，转为tensor数据类型 torchvision.transform.Resize()
复制自己的model文件 Haha()
解析保存的文件 torch.load()
验证模型得到output 和target

31.2 注意点：

转为验证模型，避免dropout层和batch_size层带来影响
把梯度置为0，节约性能

map_location=torch.device('cpu')     避免有的cpu有的gpu不能跑

图片是三维的，需要reshape（）设置batch_size大小

31.3 完整的代码

import torch
import torchvision
from PIL import Image
from torch import nn

# 存入要验证的图片
img_path = "imgs/img.png"
img = Image.open(img_path)
print(img)

# 更改图片通道数（不同的截图软件可能保留的图片通道数不一样，加上节省麻烦~）
image = img.convert('RGB')

# 更改图片格式:变成固定大小，再转为tensor数据类型
# cifar10输入的图片就是32*32
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)), torchvision.transforms.ToTensor()])
img_transform = transform(img)     # 把图片放进去
print(img_transform.shape)       # torch.Size([3, 32, 32])

# 复制自己写好的model
class Haha(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=32,kernel_size=5,stride=1,padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(in_channels=32,out_channels=32,kernel_size=5,stride=1,padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(1024, 64),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        x = self.model(x)
        return x
haha = Haha()

# 解析模型文件
model = torch.load("save_model/haha_train_moedl20.pth", map_location=torch.device('cpu'))    # map_location可以避免有的cpu跑，有的gpu跑

# reshape图片，加入batch_size变量
# 图片是三维的torch.Size([3, 32, 32])，网络训练中需要batch_size,需要reshape
img = torch.reshape(img_transform, [1, 3, 32, 32])

# 开始验证模型
model.eval()            #  把模型转化为验证模型，避免dropout层，batch_norm层带来影响
with torch.no_grad():    #  不设置梯度可以节省性能
    output = model(img)
print(output)


# 查看输出类别
test_target = output.argmax(1)
print(test_target)

可以看出，训练20轮的模型，预测出来我们随机选择的图片是cifar10的第6个类别。通过给train.py打断点可以看出cifar10的类别：

可以看出他预测的是错的，因为我们的模型准确率只有60%+，所以会预测出错，可以多选一些图片，多训练几轮再次测试。

P32 如何看开源项目

打开github，左上角搜索pytorch，就可以看到许多开源项目，选择自己喜欢的去看

看read me，里面有很多安装方法，注意事项等等（可能很多代码看不懂在这里会找到答案）
打开train文件，和我们之前自己写的差不多就是参数那块可能复杂一点,但是一般会有写help，有一些相关的解释。然后如果找不到相应的参数，可以点击他的父类里去找。

train a model

这些是训练集的参数，--是参数名称，后面的圆圈对应参数，具体代码如下：

有require就删掉，替换为它的默认值。

哈哈不爱学习

关注

21
点赞
踩
19

收藏

觉得还不错? 一键收藏
1
评论
[Pytorch入门] 土堆课程笔记

创建dataset数据集，分为训练集和测试集用dataloader进行数据加载用len（）查看数据集长度，比如cifar10 训练集和测试集各有多少张图片自己写的模型model model文件和train文件要在同一个目录下，from model import *写损失函数 loss选择合适的优化器设置网络训练参数调用haha.train()让网络进入训练状态（传数据，过模型得到output，loss，梯度置零，反向传播，打印~）
复制链接

扫一扫