PyTorch神经网络基础

块和层
块:可以描述单个层、由多个层组成的组件或整个模型本身。使用块进行抽象可以将一些块组成更大的组件。
在这里插入图片描述
在编程角度,块由类表示。它的任何子类都必须定义一个将其输入转化为输出的前向传播函数,并且必须存储任何必须的参数。
先看MLP的代码:

import torch
from torch import nn
net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
x = torch.rand(2,20)
net(x)   #net.__call__(x)的简写
tensor([[ 0.2305,  0.0688, -0.0887,  0.0833,  0.1858,  0.0170, -0.0810,  0.1837,
         -0.2655, -0.0071],
        [ 0.0535,  0.0221, -0.0006, -0.0011,  0.0999, -0.0787, -0.1264,  0.0393,
         -0.1978, -0.0057]], grad_fn=<AddmmBackward>)

自定义块

from torch.nn import function as F
#自定义快
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(20, 256)
        self.linear2 = nn.Linear(256, 10)

    def forward(self,x):
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x

#实例化   
mlp = MLP()
print(mlp(x))

tensor([[ 0.0425, -0.1575,  0.0146, -0.4185, -0.2152,  0.1353, -0.1584,  0.0866,
          0.0052, -0.3651],
        [-0.0027, -0.0181, -0.0613, -0.2916, -0.0459,  0.1007, -0.1310,  0.0546,
          0.1019, -0.2634]], grad_fn=<AddmmBackward>)

顺序块

#顺序块
class MySequential(nn.Module):
    def __init__(self,*args):
        super(MySequential, self).__init__()
        for idx,module in enumerate(args):
            self._modules[str(idx)] = module
            #变量_modules中。module的类型是OrderedDict

    def forward(self,x):
        for block in self._modules.values():
            x = block(x)
        return x
#实例化
net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
print(net(x))

tensor([[ 0.1311, -0.0544, -0.0665, -0.0638,  0.1783,  0.0375,  0.0173,  0.0048,
         -0.0642, -0.0604],
        [ 0.1811, -0.0357, -0.0488, -0.1271,  0.0690,  0.0487, -0.0540, -0.0443,
         -0.1325, -0.0720]], grad_fn=<AddmmBackward>)

enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中。
enumerate()函数的具体用法介绍

在前向传播函数中执行代码

#在前向传播函数中执行代码
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super(FixedHiddenMLP, self).__init__()
        #不计算梯度的随即权重参数,因此在其训练期间保持不变
        self.rand_weight = torch.rand((20,20),requires_grad = False)
        self.linear = nn.Linear(20,20)

    def forward(self,x):
        x = self.linear(x)
        #使用创建的常量参数以及relu和mm函数
        x = F.relu(torch.mm(x,self.rand_weight)+1)
        #复用全连接层。这相当于两个全连接层共享参数
        x = self.linear(x)
        #控制流
        while x.abs().sum() > 1:
            x/=2
        return x.sum()

#实例化
net = FixedHiddenMLP()
print(net(x))

tensor(0.0136, grad_fn=<SumBackward0>)

块的混搭

#混搭块
class NestMLP(nn.Module):
    def __init__(self):
        super(NestMLP, self).__init__()
        self.net = nn.Sequential(nn.Linear(20,64),nn.ReLU(),nn.Linear(64,32),nn.ReLU())
        self.linear = nn.Linear(32,16)

    def forward(self,x):
        x = self.net(x)
        x = self.linear(x)
        return x
#实例化
net = nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
print(net(x))

tensor(-0.2206, grad_fn=<SumBackward0>)

总结:

  • 一个块可以由许多层组成,一个块也可以由许多块组成
  • 块可以包含代码
  • 块负责大量的内部处理,包含参数初始化和反向传播
  • 层和块的顺序连接由Sequential块处理

参数访问
引入模型

import torch
from torch import nn
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
x = torch.rand(size=(2,4))
print(x)
print(net(x))
tensor([[0.5786, 0.0265, 0.9286, 0.4053],
        [0.0824, 0.2364, 0.3520, 0.9443]])
tensor([[0.2221],
        [0.2655]], grad_fn=<AddmmBackward>)

参数访问

#参数访问
print(net[2].state_dict())   #第二个全连接层的参数
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)
OrderedDict([('weight', tensor([[ 0.3187, -0.3033,  0.3075,  0.1933, -0.0262,  0.1024, -0.2199,  0.2016]])), ('bias', tensor([-0.3335]))])
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.3335], requires_grad=True)
tensor([-0.3335])

一次性访问全部参数

#一次性访问全部参数
print(*[(name,param.shape) for name,param in net[0].named_parameters()])
print(*[(name,param.shape) for name,param in net.named_parameters()])
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))

从嵌套块收集参数

#从嵌套块收集参数
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),nn.ReLU())
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}',block1())
    return net
rgnet = nn.Sequential(block2(),nn.Linear(4,1))
print(rgnet(x))
tensor([[-0.5766],
        [-0.5767]], grad_fn=<AddmmBackward>)

参数管理

print(rgnet)
print(rgnet[0][1][0].bias.data)   #访问第一个主要的块中、第二个子块的第一层的偏置项
Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)
tensor([ 0.0760,  0.1510,  0.2018,  0.1264, -0.4960,  0.3398, -0.0903,  0.0276])

内置参数初始化

#将所有权重参数初始化为标准差为0.01的高斯随机变量,且将偏置参数置为0
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)

net.apply(init_normal)
print(net[0].weight.data[0],net[0].bias.data[0])

#将所有参数初始化为给定的常数,例如1
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)

net.apply(init_constant)
print(net[0].weight.data[0],net[0].bias.data[0])

#对某些块应用不同的初始化方法
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)
tensor([-0.0013,  0.0090,  0.0025, -0.0163]) tensor(0.)
tensor([1., 1., 1., 1.]) tensor(0.)
tensor([ 0.5687, -0.2902, -0.1938,  0.4916])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])

自定义参数初始化

#自定义初始化
def my_init(m):
    if type(m) == nn.Linear:
        print(
            "Init",
            *[(name, param.shape) for name, param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5

net.apply(my_init)
print(net[0].weight[:2])
Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])
tensor([[-0.0000, -9.5939,  0.0000,  0.0000],
        [-8.4921, -6.8208,  5.8099,  9.4989]], grad_fn=<SliceBackward>)

直接设置参数

#直接设置参数
net[0].weight.data[:] += 1
net[0].weight.data[0,0] = 42
print(net[0].weight.data[0])
tensor([42.0000,  1.0000, -5.1510,  8.2907])

参数绑定
多个层间共享参数:定义一个稠密层,然后使用它的参数设置另一层的参数

#参数绑定
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),
                    nn.ReLU(),
                    shared,
                    nn.ReLU(),
                    shared,
                    nn.ReLU(),
                    nn.Linear(8,1))

net(x)
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0,0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])
tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])

自定义层
不带参数的层

import torch
from torch import nn
import torch.nn.functional as F

class CenteredLayer(nn.Module):
    def __init__(self):
        super(CenteredLayer, self).__init__()

    def forward(self,x):
        return x - x.mean()

layer = CenteredLayer()
print(layer(torch.FloatTensor([1,2,3,4,5])))
tensor([-2., -1.,  0.,  1.,  2.])

将其组合到更复杂的模型中’

net = nn.Sequential(nn.Linear(8,128),CenteredLayer())
Y = net(torch.rand(4,8))
print(Y.mean())
tensor(7.4506e-09, grad_fn=<MeanBackward0>)

带参数的层

class MyLinear(nn.Module):
    def __init__(self,inputs,outputs):
        super(MyLinear, self).__init__()
        self.weight = nn.Parameter(torch.randn(inputs,outputs))
        self.bias = nn.Parameter(torch.randn(outputs,))
    def forward(self,x):
        linear = torch.matmul(x,self.weight.data) + self.bias.data
        return F.relu(linear)

linear = MyLinear(5,3)
print(linear.weight)
print(linear(torch.rand(2,5)))
Parameter containing:
tensor([[ 0.6429, -0.5441, -0.3412],
        [-0.2366,  0.3360,  0.1640],
        [ 0.5280, -0.7468,  0.0318],
        [ 1.4163, -0.2793,  0.7133],
        [-1.6705, -1.8881,  1.2714]], requires_grad=True)
tensor([[0.0000, 0.0000, 2.7141],
        [0.0000, 0.0000, 1.4914]])

使用自定义层构建模型

net = nn.Sequential(MyLinear(64,8),MyLinear(8,1))
print(net(torch.rand(2,64)))
tensor([[3.7246],
        [3.9040]])

总结:

  • 可以通过基本层类设计自定义层
  • 在自定义层定义完成后,可以在任意环境和网络构架中调用该自定义层
  • 层可以有局部参数,这些参数可以通过内置函数创建

读写文件
对于单个张量,直接调用load和save函数分别读写它们。

import torch
from torch import nn
from torch.nn import functional as F

x = torch.arange(4)
torch.save(x,'x_file')

x2 = torch.load('x_file')
print(x2)
tensor([0, 1, 2, 3])

张量列表的存储与加载

y = torch.ones(4)
torch.save([x,y],'x_files')
x2,y2 = torch.load('x_files')
print((x2,y2))
(tensor([0, 1, 2, 3]), tensor([1., 1., 1., 1.]))

张量字典的存储与加载

mydict = {'x':x,'y':y}
torch.save(mydict,'mydict')
mydict1 = torch.load('mydict')
print(mydict1)
{'x': tensor([0, 1, 2, 3]), 'y': tensor([1., 1., 1., 1.])}

加载和保存模型参数

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(20,256)
        self.linear2 = nn.Linear(256,10)
    def forward(self,x):
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x
net = MLP()
x = torch.randn((2,20))
y = net(x)

torch.save(net.state_dict(),'mlp.params')

clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()
print(clone)

y_clone = clone(x)
print(y_clone == y)

MLP(
  (linear1): Linear(in_features=20, out_features=256, bias=True)
  (linear2): Linear(in_features=256, out_features=10, bias=True)
)
tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])

总结:

  • save和load函数可用于张量对象的文件读写
  • 可以通过参数字典保存和加载网络的全部参数
  • 保存框架必须在代码中完成,而不是在参数中完成

GPU
查看显卡信息

!nvidia-smi

查询可用GPU的数量

torch.cuda.device_count()

神经网络与GPU

device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model=model.to(device)
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值