深度学习-Pytorch版

最新推荐文章于 2024-11-15 12:28:06 发布

程序猿也烫头

最新推荐文章于 2024-11-15 12:28:06 发布

阅读量356

点赞数 10

文章标签：深度学习 pytorch 人工智能

本文链接：https://blog.csdn.net/sinat_34194779/article/details/139784023

版权

import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 创建Tensor
n = torch.Tensor(1,2,3)
print(n)
n1 = torch.Tensor([1,2,3])
print(n1)
n2 = torch.FloatTensor([1,2,3])
print(n2)
n3 = torch.IntTensor([1,2,3])
print(n3)
n4 = torch.zeros([1,2])
print(n4)
n5 = torch.ones([1,2])
print(n5)
n6 = torch.eye(3)
print(n6)
n7 = torch.randn([2,3])
print(n7)
n8 = torch.randint(0,5,(1,2))
print(n8)
n9 = torch.empty([1,2])
print(n9)

tensor([[[1.3584e+16, 2.0879e-42, 0.0000e+00],
         [2.0000e+00, 0.0000e+00, 2.1250e+00]]])
tensor([1., 2., 3.])
tensor([1., 2., 3.])
tensor([1, 2, 3], dtype=torch.int32)
tensor([[0., 0.]])
tensor([[1., 1.]])
tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])
tensor([[ 0.5922, -1.0198,  0.1753],
        [-0.1031, -0.1776, -0.6336]])
tensor([[2, 3]])
tensor([[0.0000, 4.4766]])

# 维度更改
# view与原数据共享内存，原数据更改 view之后的数据也会随之更改
n10 = n.view(6)
print(n10)
n += 1
print(n10)
# reshape 可以改变数组形状 但不能保证返回的值是拷贝的数据 因此不推荐使用

# 官方推荐 先用clone()创建副本，再使用view()更改维度
# 使用clone()可以被记录在计算图 
n1 = n.clone()
n1 = n1.view(6)
n += 1
print(n1)

tensor([1.3584e+16, 8.0000e+00, 8.0000e+00, 1.0000e+01, 8.0000e+00, 1.0125e+01])
tensor([1.3584e+16, 9.0000e+00, 9.0000e+00, 1.1000e+01, 9.0000e+00, 1.1125e+01])
tensor([1.3584e+16, 9.0000e+00, 9.0000e+00, 1.1000e+01, 9.0000e+00, 1.1125e+01])

# 切片索引类似于numpy
# 将tensor转换为numpy
print(n.numpy())
# 转换为list
print(n.tolist())
# 将numpy数据转换为tensor
n = np.array([1,1,2,3])
n1 = torch.from_numpy(n)
print(n1)

[[[1.3583538e+16 1.0000000e+01 1.0000000e+01]
  [1.2000000e+01 1.0000000e+01 1.2125000e+01]]]
[[[1.3583538448236544e+16, 10.0, 10.0], [12.0, 10.0, 12.125]]]
tensor([1, 1, 2, 3], dtype=torch.int32)

# 自动求导
# 将requires_grad属性设置为True 那么此Tensor类会追踪该张量的所有操作  通过调用backward来自动计算所有梯度 并且将张量的所有梯度自动累加到grad属性中
# 在 y.backward() 时，如果 y 是标量，则不需要为 backward() 传入任何参数；否则，需要传入一个与 y 同形的Tensor
# 可以调用.detach()方法将其与计算历史分离 阻止追踪计算
# 每个张量都有一个.grad_fn属性，该属性引用了创建 Tensor 自身的Function(除非这个张量是用户手动创建的，即这个张量的grad_fn是 None )
x = torch.randn(2,3)
# print((1,1)+x.shape)
print(x.requires_grad)
# x.requires_grad = True
x.requires_grad_(True)  # 等价于x.requires_grad = True
print(x.requires_grad)
y = 2 * x.pow(2) + 5
# y.backward(torch.ones_like(x))
# print(x.grad_fn)
# print(x.grad)
# print(x.grad.data)
z = y.pow(3).sum()
print(z.grad_fn)
z.backward()
print(x.grad)

# 梯度==导数（微分）
# 注意：grad在反向传播过程中是累加的(accumulated)，这意味着每一次运行反向传播，梯度都会累加之前的梯度，所以一般在反向传播之前需把梯度清零
x.grad.data.zero_() 

# 阻止追踪
with torch.no_grad():
    print(x.pow(2).requires_grad)

# 通过只改变data的数值 不被grad记录
x = torch.rand(size=(2,2), requires_grad=True)
y = x.pow(2)
print(y.grad_fn)
y.backward(torch.ones_like(x))
print(x.grad.data)
x.data *=2
print(x.grad)

False
True
<SumBackward0 object at 0x000001BE8CDD58B0>
tensor([[ 220.7805,  294.1247, -272.5137],
        [ 895.0473,  -30.8659,  -96.8087]])
False
<PowBackward0 object at 0x000001BE8C698910>
tensor([[1.4667, 0.3192],
        [0.1939, 0.5397]])
tensor([[1.4667, 0.3192],
        [0.1939, 0.5397]])

# 运算
print(n5+n4)
print(n5-n4)
print(n5 * n4)
print(n5*3)
print(n5**2)
print(n5.abs())
print(n5.sum())
# 元素裁剪 对输入参数按照自定义的范围进行裁剪，最后将参数裁剪的结果作为输出，所以输入参数一共有三个，
# 分别是需要进行裁剪的Tensor数据类型的变量、裁剪的上上边界和裁剪的下边界，具体的裁剪过程是
# 使用变量中的每个元素分别和裁剪的上边界及裁剪的下边界的值进行比较，如果元素的值小于裁剪的下边界的值，
# 该元素被重写成裁剪的下边界的值；同理，如果元素的值大于裁剪的上边界的值，该元素就被重写成裁剪的上边界的值。
print(torch.clamp(n5,-0.1,0.1))
# 内积 n*m 与m*k 相乘 结果是n*k
print(torch.mm(n5,n7))
# 向量积  第一个参数是矩阵 第二个参数是向量
print(torch.mv(n5, n4.reshape(-1,)))
# 转置
print(n5.T)
print(n5.t())

tensor([[1., 1.]])
tensor([[1., 1.]])
tensor([[0., 0.]])
tensor([[3., 3.]])
tensor([[1., 1.]])
tensor([[1., 1.]])
tensor(2.)
tensor([[0.1000, 0.1000]])
tensor([[ 0.4891, -1.1974, -0.4583]])
tensor([0.])
tensor([[1.],
        [1.]])
tensor([[1.],
        [1.]])

# torch.nn类
# 创建简单的神经网络 输入1000 隐藏层100 输出10层
batch = 100
input_n = 1000
hid = 100 
output = 10
eta = 0.001
x = torch.randn(batch,input_n)
# hid_layer = torch.randn(input_n, hid)
y = torch.randn(batch,output)

w1 = torch.randn(input_n, hid)
w2 = torch.randn(hid,output)
# 手动编写求导公式
epochs = 100
for i in range(epochs):
    # forward 
    h1 = x.mm(w1)
    # h1.shape() >> (batch,hid)
    y_grad = h1.mm(w2)
    # y_grad.shape >> (batch, output)
    loss = (y_grad-y).pow(2).sum()
    # grad
    grad_loss_y_grad = 2*(y_grad-y)  # shape>>(batch, output)
    grad_y_grad_w2 = h1.t() # shape>> (hid,batch)
    grad_loss_w2 = grad_y_grad_w2.mm(grad_loss_y_grad) # shape>> (hid,output)
    grad_h1_w1 = x.t() # shape>>(input_n, batch)
    grad_y_grad_h1 = w2.t()  # shape>>(output,hid)
    grad_loss_w1 = grad_h1_w1.mm(grad_loss_y_grad.clone().mm(grad_y_grad_h1).clamp(min=0))
    w1 -= eta*grad_loss_w1
    w2 -= eta*grad_loss_w2

# 调用torch.autograd进行自动求导
from torch.autograd import Variable
x = Variable(torch.randn(batch,input_n),requires_grad=False)
y = Variable(torch.randn(batch,output),requires_grad=False)
w1 = Variable(torch.randn(input_n,hid), requires_grad=True)
w2 = Variable(torch.randn(hid,output), requires_grad=True)
for i in range(epochs):
    h = x.mm(w1)
    y_p = h.mm(w2)
    loss = (y - y_p).pow(2).sum()
    # 反向传播
    loss.backward()
    w1.data -= eta*w1.grad.data
    w2.data -= eta*w2.grad.data
    w1.grad.zero_()
    w2.grad.zero_()
    # print(w1)
    # print(w2)
    # print(loss)

# letNet 前反馈神经网络
# 输入32*32图片
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # 创建5个卷积核 卷积核的尺寸是3 步长1 填充0
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5,padding=0,stride=1)
        self.conv2 = nn.Conv2d(6,16,5)
        # 进行全连接
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)),2)
        x = F.max_pool2d(F.relu(self.conv2(x)),2)
        x = x.view(1,-1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self,x):
        size = x.size()[1:]
        num = 1
        for i in size:
            num *=i
        return num

# AlexNet 网络
import torch.nn as nn
import torch.nn.functional as F

class Alexnet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3,96,11,4),
            nn.ReLU(),
            nn.MaxPool2d(3,2)
            nn.Conv2d(96,256,5,1,2),
            nn.ReLU(),
            nn.MaxPool2d(3,2)
            nn.Conv2d(256,384,3,1,1)
            nn.ReLU(),
            nn.Conv2d(284,284,3,1,1),
            nn.ReLU(),
            nn.Conv2d(284,256,3,1,1),
            nn.ReLU(),
            nn.MaxPool2d(3,2)
        )
        self.fc = nn.Sequential(
            nn.Linear(256 *6*6, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096,4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10)
        )
    def forward(self, x):
        feature = self.conv(x)
        output = self.fc(feature)
        return output

# 卷积核
# torch.nn.Conv2d(in_channels,out_channels,kernel_size,stride=1,padding=0,dilation=1,grounps=1,bias=1,padding_mode='zeros')
# dilation:这个参数决定了是否采用空洞卷积，默认为1（不采用）。从中文上来讲，这个参数的意义从卷积核上的一个参数到另一个参数需要走过的距离，那当然默认是1了，毕竟不可能两个不同的参数占同一个地方吧（为0）
# groups：决定了是否采用分组卷积，现在用的比较多的是groups = in_channel
# bais:是否要添加偏置参数作为可学习参数的一个

# 反卷积
# torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1)
# in_channels(int) – 输入信号的通道数
# out_channels(int) – 卷积产生的通道数
# kerner_size(int or tuple) - 卷积核的大小
# stride(int or tuple,optional) - 卷积步长，即要将输入扩大的倍数。
# padding(int or tuple, optional) - 输入的每一条边补充0的层数，高宽都增加2*padding
# output_padding(int or tuple, optional) - 输出边补充0的层数，高宽都增加padding
# groups(int, optional) – 从输入通道到输出通道的阻塞连接数

# 数据集
# DataLoader(batch_size,dataset,shuffle,nmworks,drop_last,sampler,collate_fn,batch_sampler)
# shuffle:是否在每个周期开始时打乱数据。默认为False
# sampler (可选): 定义从数据集中抽取样本的策略
# collate_fn (可选): 如何将多个数据样本整合成一个批次
# drop_last (可选): 如果数据集大小不能被批次大小整除，是否丢弃最后一个不完整的批次。默认为False

# 上采样
# torch.nn.Upsample(size=None, scale_factor=None, mode='nearest', align_corners=None)
# size：据不同的输入制定输出大小；
# scale_factor：指定输出为输入的多少倍数；
# mode：可使用的上采样算法，有nearest，linear，bilinear，bicubic 和 trilinear。默认使用nearest；
# align_corners ：如果为 True，输入的角像素将与输出张量对齐，因此将保存下来这些像素的值。
n = torch.Tensor(1,3,28,28)
up = torch.nn.Upsample(scale_factor=2)
n = up(n)
print(n.size())

torch.Size([1, 3, 56, 56])

# 数据归一化
# torch.nn.BatchNorm2d(num_features, eps, momentum,affine,track_running_stats)
# num_features：输入图像的通道数量-C。
# eps：稳定系数，防止分母出现0。
# momentum：BatchNorm2d里面存储均值（running_mean）和方差（running_var）更新时的参数。
# affine：代表gamma，beta是否可学。如果设为True，代表两个参数是通过学习得到的；如果设为False，代表两个参数是固定值，默认情况下，gamma是1，beta是0。
# track_running_stats：BatchNorm2d中存储的的均值和方差是否需要更新，若为True，表示需要更新；反之不需要更新。更新公式参考momentum参数介绍 。
# 计算公式 y = (x-mean(x))/(sqr(var(x)) + eps) * gamma + beta

# 扩充
# torch.nn.functional.pad(input,pad,mode='constant', value=0)
# input : 输入张量
# pad： 指定padding的维度和数目，形式是元组，稍后讲。
# mode: 填充模式，不一样的模式，填充的值也不一样，
# value： 仅当mode为‘constant’时有效，意思是填充的值是常亮，且值为value
from torch.nn import functional as F
n = torch.Tensor(2,3,4,5)
padding = (2,2,3,4,1,1,1,1)
# (2,2)表示对最低维度前面添加2个单位 后面添加2个单位
# (3,4)表示对倒数第二维度添加3个单位 后面添加4个单位
n = F.pad(n,padding)
print(n.size())

torch.Size([4, 5, 11, 9])

# 模型初始化

tensor([9.2196e-41, 1.8750e+00, 1.9394e-01])
tensor([1., 1., 0.])

# 损失函数
import torch.nn as nn
# 二类交叉熵
# torch.nn.BCELoss(weight,size_average,reduce,reduction)
# weight 每个类别的loss设置权值
# size_average 是否取loss的平均值 或者loss之和
# reduce loss返回标量
# reduction 如果是mean表示去平均值返回标量 如果是sum表示返回求和
m = nn.Sigmoid()
x = torch.randn(3, requires_grad=True)
y = torch.randn(3).random_(2)
loss = nn.BCELoss()
out = loss(m(x),y)
print(out)

# 交叉熵
# torch.nn.CrossEntropyLoss(weight,size_average,ignore_index,reduce,reduction)
# ignore_index 忽略某个类的损失
# 计算公式：loss(x,class) = -log(exp(x[class]) / Σx[j]) = -x[class] + log(Σexp(x[j])
x = torch.randn(3,5,requires_grad=True)
y = torch.LongTensor(3).random_(5)
loss = nn.CrossEntropyLoss()
out = loss(x,y)
print(out)

# L1损失函数
# torch.nn.L1Loss(size_average,reduce,reduction)
# 计算预测值核真实值之间的差值的绝对值
# 缺点：越接近真实值,收敛速度越慢
loss = nn.L1Loss()
x = torch.randn(3,requires_grad=True)
y = torch.randn(3)
out = loss(x,y)
print(out)

# MSE损失函数
# torch.nn.MSELoss(size_average,reduce,reduction)
# 计算预测值核真实值之差的平方
# 缺点：当误差值较大时容易引起震荡 波动幅度过大  可能会引起损失爆炸
loss = nn.MSELoss()
out = loss(x,y)
print(out)

# 平滑L1损失函数
# torch.nn.SmoothL1Loss(size_average,reduce,reduction,beta=1.0)
# 计算公式 if |y_p-y|<1 z=1/2(y_p-y)^2 else |y_p-y|-1/2
# 特点：综合上诉两种的优点
loss = nn.SmoothL1Loss()
out = loss(x,y)
print(out)

# 目标泊松分布的负对数似然损失
# torch.nn.PoissonNLLLoss(log_input,full,size_average,eps,reduce,reduction)
# log_input 输入是否为对数形式
# full 计算所有loss
# eps 修正项 
# 当参数log_input=True  loss(x,y)=e^x-x*y
# 当参数log_input=False loss(x,y)=x-y*log(x+eps)
loss = nn.PoissonNLLLoss()
out = loss(x,y)
print(out)

# KL散度 计算相对熵
# torch.nn.KLDivLoss(size_average,reduce,reduction,log_target)
# none：逐个元素计算。
# sum：所有元素求和，返回标量。
# mean：加权平均，返回标量。
# batchmean：batchsize 维度求平均值。
# 计算公式 ΣP(x)(logP(x)-logQ(x))
loss = nn.KLDivLoss(reduction='mean')
out = loss(x,y)
print(out)

# MarginRankingLoss
# torch.nn.MarginRankingLoss(margin,size_average,reduce,reduction)
# 计算两个向量之间的相似度
# margin 边界值x1和x2之间的差异值
# 计算公式 loss(x1,x2,y)=max(0,-y(x1-x2) + margin)
loss = nn.MarginRankingLoss()
x1 = torch.randn(3,requires_grad=True)
x2 = torch.randn(3,requires_grad=True)
y = torch.randn(3).sign() # sign函数表示 如果值大于则为1 如果小于0则为1 如果等于0则为
out = loss(x1,x2,y)
print(out)

# 多标签边界损失函数
# 对于多标签分类问题计算损失
# torch.nn.MultiLabelMarginLoss(size_average,reduce,reduction)
# 计算功能公式 loss(x,y) = Σmax(0,1-x[y[j]]-x[i])/x.size(0)
loss = nn.MultiLabelMarginLoss()
x=torch.FloatTensor([[0.8,0.3,0.5]])
y=torch.LongTensor([[2,1,0]])
out = loss(x,y)
print(out)

# 二分类损失函数
# torch.nn.SoftMarginLoss(size_average,reduce,reduction)
# 计算二分类logistic损失
# 计算公式 loss(x,y) = Σlog(1+exp(-y[i]x[i]) / x.nelement()  
# x.nelement()表示样本个数

# 多分类的折页损失
# torch.nn,MultiMarginLoss(p=1,margin=1.0,weight,reduce,reduction)
# 计算多分类的折页损失
# 计算公式 loss(x,y) = Σmax(0,margin-x[y]+x[i])^p / x.size(0)

# 三元组损失
# torch.nn.TripletMarginLoss(margin,p,eps,swap,size_average,reduce,reduction)
# 在项目中也可表示 anchor positive_examples negative_examples
# 计算公式 L(a,p,n) = max(d(a,p)-d(a-n)+margin,0)

# HighEmbeddingLoss
# torch.nn.HighEmbeddingLoss(margin,size_average,reduce,reduction)
# 对输出的embedding结果做High损失计算
# 计算公式 x if y=1 else max(0,-x)

# 余弦相似度
# torch.nn.CosineEmbeddingLoss(margin,size_average,reduce,reduction)
# 计算两个向量的余弦相似度
# 计算公式 loss(x1,x2,y) = 1-cos(x1,x2) if y==1 ; max(0,cos(x1,x2)-margin) if y==-1

# CTC损失函数
# torch.nn.CTCLoss(blank,reduction,zero_infinity)
# 用于计算时序类数据的分类
# 计算连续时间序列和目标序列之间的损失。CTCLoss对输入和目标的可能排列的概率进行求和，产生一个损失值，这个损失值对每个输入节点来说是可分的。输入与目标的对齐方式被假定为 "多对一"，这就限制了目标序列的长度，使其必须是≤输入长度。

tensor(1.1647, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(1.3789, grad_fn=<NllLossBackward0>)
tensor(1.1449, grad_fn=<MeanBackward0>)
tensor(1.6839, grad_fn=<MseLossBackward0>)
tensor(0.6888, grad_fn=<SmoothL1LossBackward0>)
tensor(1.6886, grad_fn=<MeanBackward0>)
tensor(nan, grad_fn=<MeanBackward0>)
tensor(0.8855, grad_fn=<MeanBackward0>)
tensor(0.)

训练流程

def train(epoch):
model.train() 开启训练模式
for data,label in train_loader:
data,label = data.cuda(),label.cuda() # 将数据加载到cuda中
optimizer.zero_grad() # 清除优化器梯度
out = model(data) # 进行数据训练
loss = criterion(out, label) # 计算损失值
loss.backward() # 反向传播
optimizer.step() # 更新参数

验证流程

def val(epoch):
model.train() 开启训练模式
with torch.no_grad(): # 不进行梯度更新
for data,label in train_loader:
data,label = data.cuda(),label.cuda() # 将数据加载到cuda中
out = model(data) # 进行数据训练
loss = criterion(out, label) # 计算损失值

# 优化器
# torch.optim.SGD
# torch.optim.ASGD
# torch.optim.Adadelta
# torch.optim.Adagrad
# torch.optim.Adam
# torch.optim.AdamW
# torch.optim.Adamax
# torch.optim.RAdam
# torch.optim.NAdam
# torch.optim.SparseAdam
# torch.optim.LBFGS
# torch.optim.RMSprop
# torch.optim.Rprop
# optimizer的属性:defaults存储的是优化器的超参数； state参数的缓存  ; param_groups管理的参数组
# optimizer的方法：zero_grad() 清理管理参数的梯度  step()参数更新   add_param_group()添加参数组  load_state_dict()加载状态参数  state_dict()获取优化器当前状态信息字典
import os
import torch

# 设置权重，服从正态分布  --> 2 x 2
weight = torch.randn((2, 2), requires_grad=True)
# 设置梯度为全1矩阵  --> 2 x 2
weight.grad = torch.ones((2, 2))
# 输出现有的weight和data
print("The data of weight before step:\n{}".format(weight.data))
print("The grad of weight before step:\n{}".format(weight.grad))
# 实例化优化器
optimizer = torch.optim.SGD([weight], lr=0.1, momentum=0.9)
# 进行一步操作
optimizer.step()
# 查看进行一步后的值，梯度
print("The data of weight after step:\n{}".format(weight.data))
print("The grad of weight after step:\n{}".format(weight.grad))
# 权重清零
optimizer.zero_grad()
# 检验权重是否为0
print("The grad of weight after optimizer.zero_grad():\n{}".format(weight.grad))
# 输出参数
print("optimizer.params_group is \n{}".format(optimizer.param_groups))
# 查看参数位置，optimizer和weight的位置一样，我觉得这里可以参考Python是基于值管理
print("weight in optimizer:{}\nweight in weight:{}\n".format(id(optimizer.param_groups[0]['params'][0]), id(weight)))
# 添加参数：weight2
weight2 = torch.randn((3, 3), requires_grad=True)
optimizer.add_param_group({"params": weight2, 'lr': 0.0001, 'nesterov': True})
# 查看现有的参数
print("optimizer.param_groups is\n{}".format(optimizer.param_groups))
# 查看当前状态信息
opt_state_dict = optimizer.state_dict()
print("state_dict before step:\n", opt_state_dict)
# 进行5次step操作
print(weight.data)
for _ in range(50):
    optimizer.step()
print(weight.data)
print(weight.grad)
# 输出现有状态信息
print("state_dict after step:\n", optimizer.state_dict())
# 保存参数信息
torch.save(optimizer.state_dict(),os.path.join(r"C:\Users\JX1402006\Desktop\web\label", "optimizer_state_dict.mt"))
print("----------done-----------")
# 加载参数信息
# state_dict = torch.load(r"C:\Users\JX1402006\Desktop\web\label\optimizer_state_dict.mt") # 需要修改为你自己的路径
# optimizer.load_state_dict(state_dict)
# print("load state_dict successfully\n{}".format(state_dict))
# # 输出最后属性信息
# print("\n{}".format(optimizer.defaults))
# print("\n{}".format(optimizer.state))
# print("\n{}".format(optimizer.param_groups))

The data of weight before step:
tensor([[-0.6448,  1.9165],
        [ 1.1273, -0.3190]])
The grad of weight before step:
tensor([[1., 1.],
        [1., 1.]])
The data of weight after step:
tensor([[-0.7448,  1.8165],
        [ 1.0273, -0.4190]])
The grad of weight after step:
tensor([[1., 1.],
        [1., 1.]])
The grad of weight after optimizer.zero_grad():
tensor([[1., 1.],
        [1., 1.]])
optimizer.params_group is 
[{'params': [tensor([[-0.7448,  1.8165],
        [ 1.0273, -0.4190]], requires_grad=True)], 'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False}]
weight in optimizer:2618474230864
weight in weight:2618474230864

optimizer.param_groups is
[{'params': [tensor([[-0.7448,  1.8165],
        [ 1.0273, -0.4190]], requires_grad=True)], 'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False}, {'params': [tensor([[ 0.3353, -0.1803, -1.7458],
        [-0.2609,  0.4420,  1.8614],
        [ 1.5091, -1.3670,  1.0478]], requires_grad=True)], 'lr': 0.0001, 'nesterov': True, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'maximize': False, 'foreach': None, 'differentiable': False}]
state_dict before step:
 {'state': {0: {'momentum_buffer': tensor([[1., 1.],
        [1., 1.]])}}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0]}, {'lr': 0.0001, 'nesterov': True, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [1]}]}
tensor([[-0.7448,  1.8165],
        [ 1.0273, -0.4190]])
tensor([[-42.6866, -40.1253],
        [-40.9144, -42.3607]])
tensor([[1., 1.],
        [1., 1.]])
state_dict after step:
 {'state': {0: {'momentum_buffer': tensor([[9.9536, 9.9536],
        [9.9536, 9.9536]])}}, 'param_groups': [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0]}, {'lr': 0.0001, 'nesterov': True, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [1]}]}
----------done-----------

# 小实验
import torch.nn.functional as F
x = torch.linspace(-1,1,100)
# x = torch.unsqueeze(x,dim=1)
y = 2*x.pow(2)

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.para = torch.randn(3,requires_grad=True)
    
    def forward(self,x):
        out = torch.zeros_like(x)
        for i,p in enumerate(self.para):
            out += p*x.pow(i)
        return out


loss = nn.SmoothL1Loss()
epochs=np.arange(200)
net = Net()
print(net.para)
opt_sgd = torch.optim.SGD([net.para], lr=0.1)
# opt_Adam = torch.optim.Adam(net.parameters(), lr=0.1)
out_sgd = []
for i in range(len(epochs)):
    opt_sgd.zero_grad()
    out = loss(net(x),y)
    out.backward()
    # print(out.data)
    out_sgd.append(out.data)
    opt_sgd.step()
print(net.para)
plt.plot(epochs,out_sgd)
x = torch.linspace(-0.5, 0.5, 5)
y = net(x)
t = 2*x.pow(2)
print(y, t)
plt.show()

tensor([ 0.1017, -1.5970, -1.1859], requires_grad=True)
tensor([ 0.2457, -0.0054,  1.3370], requires_grad=True)
tensor([0.5826, 0.3306, 0.2457, 0.3279, 0.5772], grad_fn=<AddBackward0>) tensor([0.5000, 0.1250, 0.0000, 0.1250, 0.5000])

外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传

# 反向传播
x = torch.tensor([1,2,3],dtype=torch.float,requires_grad=True)
y = x**2 * 2
# y.requires_grad = True
print(y)
y.sum().backward()
print(x.grad)
x.grad.data.zero_()

tensor([ 2.,  8., 18.], grad_fn=<MulBackward0>)
tensor([ 4.,  8., 12.])





tensor([0., 0., 0.])

# 微分运算
x = np.linspace(-1,1,100)
y = np.exp(x)
p = torch.randn(3)
c_p = p.clone()
epochs = 200
lr = 0.2
def func(x,para):
    l_p = len(para)
    fun = 0
    for i in range(l_p):
        fun += para[i] * x**i
    return fun
for i in range(epochs):
    loss = 1/2*(func(x,p) - y)**2
    loss_u = func(x,p) - y
    loss_p1 = loss_u
    loss_p2 = loss_u*x
    loss_p3 = loss_u*x**2
    p[0]-= lr*loss_p1.mean()
    p[1]-=lr*loss_p2.mean()
    p[2]-=lr*loss_p3.mean()
plt.plot(x,y)
plt.plot(x,func(x,p),color='r')
plt.show()
print(p)

外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传

tensor([1.0035, 1.1058, 0.5175])

# 使用pytorch框架
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.p = c_p
        self.p.requires_grad=True
        if hasattr(self.p.grad,'data'):
           self.p.grad.data.zero_()

    def forward(self,x):
        l_p = len(self.p)
        fun = 0
        for i in range(l_p):
            fun += self.p[i] * x.pow(i)
        return fun

net = Net()
opt = torch.optim.SGD([net.p],lr=lr,momentum=0)
loss_l = []
for i in range(epochs):
    loss = (1/2*(net(torch.from_numpy(x)) - torch.from_numpy(y))**2).mean()
    loss_l.append(float(loss))
    opt.zero_grad()
    loss.backward()
    opt.step()
# print(np.arange(200),len(loss_l))
plt.plot(np.arange(200),np.array(loss_l))
plt.show()
print(net.p)

外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传

tensor([1.0035, 1.1058, 0.5175], requires_grad=True)

# 残差神经网络resnet

# 卷积核封装
def conv3x3(in_channel,out_channel,kernel_size=3,padding=0,stride=1,groups=1):
    return nn.Conv2d(in_channels=in_channel,
                     out_channels=out_channel,
                     kernel_size=kernel_size,
                     padding=padding,
                     groups=groups,
                     stride=stride)
class BasicBlock(nn.Module):
    expansion: int = 1

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError("BasicBlock only supports groups=1 and base_width=64")
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x: Tensor) -> Tensor:
        identity = x  # x  给自己先备份一份

        out = self.conv1(x)  # 对x做卷积 
        out = self.bn1(out)  # 对x归一化 
        out = self.relu(out)  # 对x用激活函数

        out = self.conv2(out)  # 对x做卷积
        out = self.bn2(out)  # 归一化

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity  # 进行downsample
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
    # This variant is also known as ResNet V1.5 and improves accuracy according to
    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.

    expansion: int = 4  # 对输出通道进行倍增

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.0)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

        # Bottleneckd forward函数和BasicBlock类似，不再额外注释
    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

tensor([20.8986])
tensor([20.9963], grad_fn=<AddBackward0>)

# ResNet 网络
class ResNet(nn.Module):
    def __init__(
        self,
        block: Type[Union[BasicBlock, Bottleneck]], # 选择基本模块
        layers: List[int], # 每一层block的数目构成 -> [3,4,6,3]
        num_classes: int = 1000, # 分类数目
        zero_init_residual: bool = False, # 初始化
        
        #######其他卷积构成，与本文ResNet无关######
        groups: int = 1,
        width_per_group: int = 64,
        replace_stride_with_dilation: Optional[List[bool]] = None,
        #########################################
        
        norm_layer: Optional[Callable[..., nn.Module]] = None, # norm层
    ) -> None:
        super().__init__()
        _log_api_usage_once(self)
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer
		
        self.inplanes = 64 # 输入通道
        
        #######其他卷积构成，与本文ResNet无关######
        self.dilation = 1 # 空洞卷积
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError(
                "replace_stride_with_dilation should be None "
                f"or a 3-element tuple, got {replace_stride_with_dilation}"
            )
        self.groups = groups
        self.base_width = width_per_group
        #########################################
        
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        # 通过_make_layer带到层次化设计的效果
        self.layer1 = self._make_layer(block, 64, layers[0])  # 对应着conv2_x
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])  # 对应着conv3_x
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])  # 对应着conv4_x
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])  # 对应着conv5_x
        # 分类头
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
		
        # 模型初始化
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck) and m.bn3.weight is not None:
                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[arg-type]
                elif isinstance(m, BasicBlock) and m.bn2.weight is not None:
                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]
	# 层次化设计
    def _make_layer(
        self,
        block: Type[Union[BasicBlock, Bottleneck]], # 基本构成模块选择
        planes: int,  # 输入的通道
        blocks: int, # 模块数目
        stride: int = 1, # 步长
        dilate: bool = False, # 空洞卷积，与本文无关
    ) -> nn.Sequential:
        norm_layer = self._norm_layer
        downsample = None # 是否采用下采样
        ####################无关#####################
        previous_dilation = self.dilation 
        if dilate:
            self.dilation *= stride
            stride = 1
        #############################################
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )
		
        # 使用layers存储每个layer
        layers = []
        layers.append(
            block(
                self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer
            )
        )
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(
                block(
                    self.inplanes,
                    planes,
                    groups=self.groups,
                    base_width=self.base_width,
                    dilation=self.dilation,
                    norm_layer=norm_layer,
                )
            )
		# 将layers通过nn.Sequential转化为网络
        return nn.Sequential(*layers)

    def _forward_impl(self, x: Tensor) -> Tensor:
        # See note [TorchScript super()]
        x = self.conv1(x)  # conv1   x shape [1 64 112 112]
        x = self.bn1(x)   # 归一化处理   
        x = self.relu(x)  # 激活函数
        x = self.maxpool(x)  # conv2_x的3x3 maxpool        x shape [1 64 56 56]

        x = self.layer1(x) # layer 1
        x = self.layer2(x) # layer 2
        x = self.layer3(x) # layer 3
        x = self.layer4(x) # layer 4

        x = self.avgpool(x) # 自适应池化
        x = torch.flatten(x, 1) 
        x = self.fc(x) # 分类

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)

torch.Size([3, 1, 98, 98])

# 应用实例

#GPU配置
# 方案一：使用os.environ，这种情况如果使用GPU不需要设置
import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' # 指明调用的GPU为0,1号

# 方案二：使用“device”，后续对要使用GPU的变量用.to(device)即可
# device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") # 指明调用的GPU为1号

## 配置其他超参数，如batch_size, num_workers, learning rate, 以及总的epochs
batch_size = 256
num_workers = 4   # 对于Windows用户，这里应设置为0，否则会出现多线程错误
lr = 1e-4
epochs = 20

# 创建数据集
from torchvision import datasets
from torch.utils.data import DataLoader, Dataset
import cv2
from pathlib import Path
import numpy as np
image_suffix = ['.jpg','.png','.bmp','.jepg']
label_suffix = ['.txt']
# 以yolo处理数据为例  
class YoLoData(Dataset):
    def __init__(self,img_path:str, label_path:str, img_size:tuple):
        self.img_path = img_path
        self.label_path = label_path
        self.img_size = img_size
        self.images, self.labels = self.transform()
    def __getitem__(self, idx):
        img = self.images[idx]
        img = img.astype(np.float32)
        img = img.transpose(2,0,1)
        img = torch.from_numpy(img)
        img /= 255.0
        label = self.labels[idx]
        label = torch.from_numpy(label)
        return img, label
    def __len__(self):
        return len(self.images)
    def transform(self):
        images = [item for item in Path(self.img_path).iterdir() if item.suffix.lower() in image_suffix]
        labels = [item for item in Path(self.label_path).iterdir() if item.suffix.lower() in label_suffix]
        labels_np = []
        images_np = []
        dw,dh = self.img_size
        for item in images:
            img = cv2.imread(str(item))
            h,w,c = img.shape
            scale = round(w/dw) if round(w / dw) > round(h/dh) else round(h/dh)
            img = cv2.resize(img,(round(w/scale), round(h/scale)))
            pw = round((dw-w/scale)/2)
            ph = round((dh-h/scale)/2)
            top = round(ph -0.1)
            bw = round(ph + 0.1)
            lt = round(pw-0.1)
            rt = round(pw + 0.1)
            img = cv2.copyMakeBorder(img,top=top,bottom=bw,left=lt,right=rt,borderType=cv2.BORDER_CONSTANT,value=(0,0,0))
            images_np.append(img)
        for item in labels:
            with open(item,'r',encoding='utf-8') as f:
                data = [item.split() for item in f.readlines()]
            data_np = np.array(data)
            labels_np.append(data_np.astype(np.float32))
        return images_np, labels_np

yolo_train = YoLoData(r'C:\Users\JX1402006\Desktop\web\label', r'C:\Users\JX1402006\Desktop\web\label',(640,640))
# yolo_test =  YoloData()
# print(torch.tensor(yolo_train.images))
train_dataLoader = DataLoader(yolo_train, batch_size=batch_size, shuffle=True, drop_last=True)

# 创建网络
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3,9,3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,stride=2),
            nn.Dropout(0.1),
            nn.Conv2d(9,18,3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),
            nn.Dropout(0.2))
        self.fc = nn.Sequential(
            nn.Linear(18*160*160,2048),
            nn.ReLU(),
            nn.Linear(2048,10))
        # self.fc = nn.Layer(2048, 2)
    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1, 18*160*160)
        x = self.fc(x)
        fc = nn.Softmax()
        return fc(x)

model = Net()
# model = model.cuda()
loss = nn.SmoothL1Loss()
opt = torch.optim.SGD(model.parameters(), lr=lr)

# 训练流程
def train():
    for data, label in train_dataLoader:
        img, label = data, label
        opt.zero_grade()
        loss(model(data), label)
        loss.backward()
        opt.step()

# 验证流程

# 模型保存
save_path = r'./model.mt'
torch.save(model,save_path)

# 范例
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# 配置GPU，这里有两种方式
## 方案一：使用os.environ
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
# 方案二：使用“device”，后续对要使用GPU的变量用.to(device)即可
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

## 配置其他超参数，如batch_size, num_workers, learning rate, 以及总的epochs
batch_size = 256
num_workers = 4   # 对于Windows用户，这里应设置为0，否则会出现多线程错误
lr = 1e-4
epochs = 20

# 首先设置数据变换
from torchvision import transforms

image_size = 28
data_transform = transforms.Compose([
    transforms.ToPILImage(),  
     # 这一步取决于后续的数据读取方式，如果使用内置数据集读取方式则不需要
    transforms.Resize(image_size),
    transforms.ToTensor()
])

## 读取方式一：使用torchvision自带数据集，下载可能需要一段时间
from torchvision import datasets

train_data = datasets.FashionMNIST(root='./', train=True, download=True, transform=data_transform)
test_data = datasets.FashionMNIST(root='./', train=False, download=True, transform=data_transform)

## 读取方式二：读入csv格式的数据，自行构建Dataset类
# csv数据下载链接：https://www.kaggle.com/zalando-research/fashionmnist
class FMDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        self.images = df.iloc[:,1:].values.astype(np.uint8)
        self.labels = df.iloc[:, 0].values
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx].reshape(28,28,1)
        label = int(self.labels[idx])
        if self.transform is not None:
            image = self.transform(image)
        else:
            image = torch.tensor(image/255., dtype=torch.float)
        label = torch.tensor(label, dtype=torch.long)
        return image, label

train_df = pd.read_csv("./FashionMNIST/fashion-mnist_train.csv")
test_df = pd.read_csv("./FashionMNIST/fashion-mnist_test.csv")
train_data = FMDataset(train_df, data_transform)
test_data = FMDataset(test_df, data_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# 模型设计
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),
            nn.Dropout(0.3),
            nn.Conv2d(32, 64, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),
            nn.Dropout(0.3)
        )
        self.fc = nn.Sequential(
            nn.Linear(64*4*4, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1, 64*4*4)
        x = self.fc(x)
        # x = nn.functional.normalize(x)
        return x

model = Net()
model = model.cuda()

# 使用torch.nn模块自带的CrossEntropy损失
# PyTorch会自动把整数型的label转为one-hot型，用于计算CE loss
# 这里需要确保label是从0开始的，同时模型不加softmax层（使用logits计算）,这也说明了PyTorch训练中各个部分不是独立的，需要通盘考虑
criterion = nn.CrossEntropyLoss()
# 优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(epoch):
    model.train()
    train_loss = 0
    for data, label in train_loader:
        data, label = data.cuda(), label.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)
    train_loss = train_loss/len(train_loader.dataset)
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))

def val(epoch):       
    model.eval()
    val_loss = 0
    gt_labels = []
    pred_labels = []
    with torch.no_grad():
        for data, label in test_loader:
            data, label = data.cuda(), label.cuda()
            output = model(data)
            preds = torch.argmax(output, 1)
            gt_labels.append(label.cpu().data.numpy())
            pred_labels.append(preds.cpu().data.numpy())
            loss = criterion(output, label)
            val_loss += loss.item()*data.size(0)
    val_loss = val_loss/len(test_loader.dataset)
    gt_labels, pred_labels = np.concatenate(gt_labels), np.concatenate(pred_labels)
    acc = np.sum(gt_labels==pred_labels)/len(pred_labels)
    print('Epoch: {} \tValidation Loss: {:.6f}, Accuracy: {:6f}'.format(epoch, val_loss, acc))

for epoch in range(1, epochs+1):
    train(epoch)
    val(epoch)

torch.Size([4, 98, 98])

# 模型定义
# nn.Sequential()
conv = nn.Sequential(
    nn.Conv2d(3,9,3),
    nn.ReLU(),
    nn.MaxPool2d(2,stride=2)
)
print(conv)
# x = conv(x)

# nn.ModuleList()
conv = nn.ModuleList([nn.Conv2d(3,9,3),
    nn.ReLU(),
    nn.MaxPool2d(2,stride=2)])
print(conv)
# for model in conv:
#     x = model(x)

# nn.ModuleDict()
conv = nn.ModuleDict({'conv1':nn.Conv2d(3,9,3),'relu':nn.ReLU()})
print(conv)
# for model in conv.values():
#      x = model(x)

Sequential(
  (0): Conv2d(3, 9, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
ModuleList(
  (0): Conv2d(3, 9, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
ModuleDict(
  (conv1): Conv2d(3, 9, kernel_size=(3, 3), stride=(1, 1))
  (relu): ReLU()
)

# 模型模块化 U_Net网络
import torch
import torch.nn as nn
import torch.nn.functional as F

class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=False):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)


class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=False):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

n = torch.Tensor(1,3,136,136)
down1 = Down(3,6)
n = down1(n)
down2 = Down(6,6)
n1 = down2(n)
print(n1.size(), n.size())
up = Up(12,6,True)
x = up(n1,n)
print(x.size())

torch.Size([1, 6, 34, 34]) torch.Size([1, 6, 68, 68])
torch.Size([1, 6, 68, 68])

# 修改模型
# 修改模型输出
from collections import OrderedDict
import torchvision.models as models
net = models.resnet50()
print(net)
classify = nn.Sequential(OrderedDict([('fc1',nn.Linear(2048,128)),
                                      ('relu1',nn.ReLU(inplace=True)),
                                       ('fc2',nn.Linear(128, 10))]))
net.fc = classify
print(net)

# 修改模型输入
class Model(nn.Module):
    def __init__(self, net):
        super(Model, self).__init__()
        self.net = net
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc_add = nn.Linear(1001, 10, bias=True)
        self.output = nn.Softmax(dim=1)
        
    def forward(self, x, add_variable):
        x = self.net(x)
        x = torch.cat((self.dropout(self.relu(x)), add_variable.unsqueeze(1)),1)
        x = self.fc_add(x)
        x = self.output(x)
        return x

# 添加额外输出
class Model(nn.Module):
    def __init__(self, net):
        super(Model, self).__init__()
        self.net = net
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(1000, 10, bias=True)
        self.output = nn.Softmax(dim=1)
        
    def forward(self, x, add_variable):
        x1000 = self.net(x)
        x10 = self.dropout(self.relu(x1000))
        x10 = self.fc1(x10)
        x10 = self.output(x10)
        return x10, x1000

net = models.resnet50()
opt = torch.optim.SGD(net.parameters(), lr=0.1)
print(opt.state_dict())

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (layer2): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (3): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (layer3): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (3): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (4): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (5): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (layer4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Linear(in_features=2048, out_features=1000, bias=True)
)
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (layer2): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (3): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (layer3): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (3): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (4): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (5): Bottleneck(
      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (layer4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): Bottleneck(
      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Sequential(
    (fc1): Linear(in_features=2048, out_features=128, bias=True)
    (relu1): ReLU(inplace=True)
    (fc2): Linear(in_features=128, out_features=10, bias=True)
  )
)
{'state': {}, 'param_groups': [{'lr': 0.1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160]}]}

# 模型保存和读取
# 存储模型主要采用pkl，pt，pth三种格式
# 一个PyTorch模型主要包含两个部分：模型结构和权重。
# 其中模型是继承nn.Module的类，权重的数据结构是一个字典（key是层名，value是权重向量）。
# 存储也由此分为两种形式：存储整个模型（包括结构和权重），和只存储模型权重。
torch.save(model,path) # 存储模型
torch.save(model.state_dict(), path) # 存储权重
# 单卡存储和读取
torch.save(model, save_dir)
loaded_model = torch.load(save_dir)
loaded_model.cuda()


# 单卡保存 多卡加载
# 保存+读取整个模型
torch.save(model, save_dir)

os.environ['CUDA_VISIBLE_DEVICES'] = '1,2'   #这里替换成希望使用的GPU编号
loaded_model = torch.load(save_dir)
loaded_model = nn.DataParallel(loaded_model).cuda()

# 保存+读取模型权重
torch.save(model.state_dict(), save_dir)

os.environ['CUDA_VISIBLE_DEVICES'] = '1,2'   #这里替换成希望使用的GPU编号
loaded_model = models.resnet152()   #注意这里需要对模型结构有定义
loaded_model.load_state_dict(torch.load(save_dir))
loaded_model = nn.DataParallel(loaded_model).cuda()


# 其他参数的保存
torch.save({
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'lr_scheduler': lr_scheduler.state_dict(),
        'epoch': epoch,
        'args': args,
    }, checkpoint_path)

# 自定义损失函数
class DiceLoss(nn.Module):
    def __init__(self,weight=None,size_average=True):
        super(DiceLoss,self).__init__()
        
    def forward(self,inputs,targets,smooth=1):
        inputs = F.sigmoid(inputs)       
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        intersection = (inputs * targets).sum()                   
        dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
        return 1 - dice

tensor([-0.0091,  0.0000,  0.0000,  0.0000])

# 动态调整学习率
# torch.optim.lr_scheduler
def adjust_learning_rate(optimizer, epoch):
    lr = args.lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

# 模型微调
import torchvision.models as models
resnet18 = models.resnet18()
# resnet18 = models.resnet18(pretrained=False)  等价于与上面的表达式
alexnet = models.alexnet()
vgg16 = models.vgg16()
squeezenet = models.squeezenet1_0()
densenet = models.densenet161()
inception = models.inception_v3()
googlenet = models.googlenet()
shufflenet = models.shufflenet_v2_x1_0()
mobilenet_v2 = models.mobilenet_v2()
mobilenet_v3_large = models.mobilenet_v3_large()
mobilenet_v3_small = models.mobilenet_v3_small()
resnext50_32x4d = models.resnext50_32x4d()
wide_resnet50_2 = models.wide_resnet50_2()
mnasnet = models.mnasnet1_0()

# 训练特定层
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

# 将resnet18 1000类改4类
import torchvision.models as models
# 冻结参数的梯度
feature_extract = True
model = models.resnet18(pretrained=True)
set_parameter_requires_grad(model, feature_extract)
# 修改模型
num_ftrs = model.fc.in_features
model.fc = nn.Linear(in_features=num_ftrs, out_features=4, bias=True)

# 模型微调timm
import timm
avail_models = timm.list_models(pretrained=True)
print(len(avail_models))

d:\python39\lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm


1373