Pytorch 学习笔记(三)——tips

1、网络搭建

第一种

class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)
        self.predict = torch.nn.Linear(n_hidden, n_output)

    def forward(self, x):
        x = F.relu(self.hidden(x))
        x = self.predict(x)
        return x

net1 = Net(1, 10, 1) 
print(net1)
Net(
  (hidden): Linear(in_features=1, out_features=10, bias=True)
  (predict): Linear(in_features=10, out_features=1, bias=True)
)

第二种——快速搭建:
Sequential是一个特殊的Module,它包含几个子Module,前向传播时会将输入一层接一层地传递下去。

######method1
net2 = torch.nn.Sequential(
    torch.nn.Linear(1, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 1)
)
print(net2)

######method2
net3 = torch.nn.Sequential()
net3.add_module('hidden',torch.nn.Linear(1,10))
net3.add_module('activation layer', torch.nn.ReLU())
net3.add_module('predict', torch.nn.Linear(10,1))
print(net3)
#net2
Sequential(
  (0): Linear(in_features=1, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=1, bias=True)
)

#net3
Sequential(
  (hidden): Linear(in_features=1, out_features=10, bias=True)
  (activation layer): ReLU()
  (predict): Linear(in_features=10, out_features=1, bias=True)
)

2、保存模型

# 假数据
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)

def save():
    # 建网络
    net1 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1)
    )
    optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
    loss_func = torch.nn.MSELoss()

    # 训练
    for t in range(100):
        prediction = net1(x)
        loss = loss_func(prediction, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step(


 ######保存模型
 ######method1  (推荐)
     torch.save(net1.state_dict(), 'net_params.pkl')   # 只保存网络中的参数 (速度快, 占内存少)

#恢复
def restore_params():
    # 新建 net2
    net2 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1)
    )

    # 将保存的参数复制到 net2
    net2.load_state_dict(torch.load('net_params.pkl'))


######method2
    torch.save(net1, 'net.pkl')  # 保存整个网络

#恢复
def restore_net():
    # restore entire net1 to net2
    net3 = torch.load('net.pkl')

3、批训练


import torch
import torch.utils.data as Data

BATCH_SIZE = 5      # 批训练的数据个数

x = torch.linspace(1, 10, 10)       # x data (torch tensor)
y = torch.linspace(10, 1, 10)       # y data (torch tensor)

# 先转换成 torch 能识别的 Dataset
torch_dataset = Data.TensorDataset(x, y)

# 把 dataset 放入 DataLoader
loader = Data.DataLoader(
    dataset=torch_dataset,      # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # 要不要打乱数据 (打乱比较好)
    num_workers=2,              # 多线程来读数据
)
def show_batch():
   for epoch in range(3):   # 训练所有!整套!数据 3 次
      for step, (batch_x, batch_y) in enumerate(loader):  # 每一步 loader 释放一小批数据用来学习
        # 假设这里就是你训练的地方...

        # 打出来一些数据
          print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
                batch_x.numpy(), '| batch y: ', batch_y.numpy())


if __name__ == '__main__':
    show_batch()
Epoch:  0 | Step:  0 | batch x:  [ 7.  5.  1.  8. 10.] | batch y:  [ 4.  6. 10.  3.  1.]
Epoch:  0 | Step:  1 | batch x:  [4. 9. 2. 6. 3.] | batch y:  [7. 2. 9. 5. 8.]
Epoch:  1 | Step:  0 | batch x:  [3. 2. 7. 5. 1.] | batch y:  [ 8.  9.  4.  6. 10.]
Epoch:  1 | Step:  1 | batch x:  [ 9.  4.  8.  6. 10.] | batch y:  [2. 7. 3. 5. 1.]
Epoch:  2 | Step:  0 | batch x:  [10.  8.  3.  4.  5.] | batch y:  [1. 3. 8. 7. 6.]
Epoch:  2 | Step:  1 | batch x:  [1. 9. 6. 2. 7.] | batch y:  [10.  2.  5.  9.  4.]

torch.utils.data.TensorDataset(data_tensor, target_tensor)
包装数据和目标张量的数据集.
通过沿着第一个维度索引两个张量来恢复每个样本.
Parameters:
data_tensor (Tensor) – 包含样本数据.
target_tensor (Tensor) – 包含样本目标 (标签).

torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False,
                             sampler=None, batch_sampler=None, 
                             num_workers=0, 
                             collate_fn=<function default_collate at 0x4316c08>,
                             pin_memory=False, drop_last=False)

数据加载器. 组合数据集和采样器,并在数据集上提供单进程或多进程迭代器.
Parameters:
dataset (Dataset – 从该数据集中加载数据.
batch_size(int, optional)– 每个 batch 加载多少个样本 (默认值: 1).
shuffle(bool, optional)– 设置为 True 时, 会在每个 epoch 重新打乱数据 (默认值: False).
sampler (Sampler, optional) – 定义从数据集中提取样本的策略. 如果指定, shuffle 值必须为 False.
batch_sampler (Sampler, optional)– 与 sampler 相似, 但一次返回一批指标. 与 batch_size, shuffle, sampler, and drop_last 互斥.
num_workers (int, optional) – 用多少个子进程加载数据. 0表示数据将在主进程中加载 (默认值: 0)
collate_fn (callable, optional) – 合并样本列表以形成一个 mini-batch.
pin_memory (bool, optional) – 如果为 True, 数据加载器会将张量复制到 CUDA 固定内存中, 然后再返回它们.
drop_last (bool, optional) – 设定为 True 以丢掉最后一个不完整的 batch, 如果数据集大小不能被 batch size整除. 设定为 False 并且数据集的大小不能被 batch size整除, 则最后一个 batch 将会更小. (default: False) eg:若batch_size=8,则step 1 中batch x 的size=2.

4、Optimizer优化器

import torch
import torch.utils.data as Data
import torch.nn.functional as F
import matplotlib.pyplot as plt

LR = 0.01
BATCH_SIZE = 32
EPOCH = 12

# fake dataset
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))

# plot dataset
#plt.scatter(x.numpy(), y.numpy())
#plt.show()

#  data loader
torch_dataset = Data.TensorDataset(x, y)
loader = Data.DataLoader(dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,)

# 默认的 network 形式
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(1, 20)   # hidden layer
        self.predict = torch.nn.Linear(20, 1)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x


if __name__ == '__main__':
# 为每个优化器创建一个 net
 # different nets

    net_SGD         = Net()
    net_Momentum    = Net()
    net_RMSprop     = Net()
    net_Adam        = Net()
    nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]                #将4个网络放到一个list中

    # different optimizers

    opt_SGD         = torch.optim.SGD(net_SGD.parameters(), lr=LR)
    opt_Momentum    = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
    opt_RMSprop     = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
    opt_Adam        = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
    optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]           #将4个优化器放到一个list中

    loss_func = torch.nn.MSELoss()
    losses_his = [[], [], [], []]   # record loss

    # training

    for epoch in range(EPOCH):

        print('Epoch: ', epoch)

        for step, (b_x, b_y) in enumerate(loader):          # for each training step

            for net, opt, l_his in zip(nets, optimizers, losses_his):

                output = net(b_x)              # get output for every net
                loss = loss_func(output, b_y)  # compute loss for every net
                opt.zero_grad()                # clear gradients for next train
                loss.backward()                # backpropagation, compute gradients
                opt.step()                     # apply gradients
                l_his.append(loss.data.numpy())     # loss recoder

    labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']

    for i, l_his in enumerate(losses_his):

        plt.plot(l_his, label=labels[i])

    plt.legend(loc='best')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.ylim((0, 0.2))
    plt.show()

这里写图片描述

torch.optim.SGD(params, lr=<object object>, momentum=0, dampening=0, weight_decay=0, nesterov=False)

实现随机梯度下降算法( momentum 可选)
Args:
params(iterable): 待优化的迭代参数或者是定义了参数组的 dict eg: model.parameters()
lr (float): 学习率
dampening(float, optional): 动量的抑制因子 (默认值: 0)
weight_decay(float, optional): 权重衰减 (L2 正则化) (默认值: 0)
nesterov (bool, optional): 使用 Nesterov 动量 (默认值: False)

torch.optim.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)

实现 RMSprop 算法.
Args:
alpha(float, optional): 平滑常量 (default: 0.99)
eps(float, optional): 为了增加数值计算的稳定性而加到分母里的项 (默认值: 1e-8)
centered (bool, optional) : 如果为 True, 计算 RMSProp 的中值, 并且用它的方差预测值对梯度进行归一化

torch.optim.Adamax(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)

实现 Adamax 算法 ( Adam 的一种基于无穷范数的变种).
Args:
betas (Tuple[float, float], optional): 用来计算梯度和平方梯度的系数

2018.09.05 整理于莫烦Python教程
https://morvanzhou.github.io/tutorials/machine-learning/torch/3-03-fast-nn/

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值