1、网络搭建
第一种:
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden)
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x
net1 = Net(1, 10, 1)
print(net1)
Net(
(hidden): Linear(in_features=1, out_features=10, bias=True)
(predict): Linear(in_features=10, out_features=1, bias=True)
)
第二种——快速搭建:
Sequential
是一个特殊的Module,它包含几个子Module,前向传播时会将输入一层接一层地传递下去。
######method1
net2 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
print(net2)
######method2
net3 = torch.nn.Sequential()
net3.add_module('hidden',torch.nn.Linear(1,10))
net3.add_module('activation layer', torch.nn.ReLU())
net3.add_module('predict', torch.nn.Linear(10,1))
print(net3)
#net2
Sequential(
(0): Linear(in_features=1, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=1, bias=True)
)
#net3
Sequential(
(hidden): Linear(in_features=1, out_features=10, bias=True)
(activation layer): ReLU()
(predict): Linear(in_features=10, out_features=1, bias=True)
)
2、保存模型
# 假数据
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1)
def save():
# 建网络
net1 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
loss_func = torch.nn.MSELoss()
# 训练
for t in range(100):
prediction = net1(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step(
######保存模型
######method1 (推荐)
torch.save(net1.state_dict(), 'net_params.pkl') # 只保存网络中的参数 (速度快, 占内存少)
#恢复
def restore_params():
# 新建 net2
net2 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
# 将保存的参数复制到 net2
net2.load_state_dict(torch.load('net_params.pkl'))
######method2
torch.save(net1, 'net.pkl') # 保存整个网络
#恢复
def restore_net():
# restore entire net1 to net2
net3 = torch.load('net.pkl')
3、批训练
import torch
import torch.utils.data as Data
BATCH_SIZE = 5 # 批训练的数据个数
x = torch.linspace(1, 10, 10) # x data (torch tensor)
y = torch.linspace(10, 1, 10) # y data (torch tensor)
# 先转换成 torch 能识别的 Dataset
torch_dataset = Data.TensorDataset(x, y)
# 把 dataset 放入 DataLoader
loader = Data.DataLoader(
dataset=torch_dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=2, # 多线程来读数据
)
def show_batch():
for epoch in range(3): # 训练所有!整套!数据 3 次
for step, (batch_x, batch_y) in enumerate(loader): # 每一步 loader 释放一小批数据用来学习
# 假设这里就是你训练的地方...
# 打出来一些数据
print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
batch_x.numpy(), '| batch y: ', batch_y.numpy())
if __name__ == '__main__':
show_batch()
Epoch: 0 | Step: 0 | batch x: [ 7. 5. 1. 8. 10.] | batch y: [ 4. 6. 10. 3. 1.]
Epoch: 0 | Step: 1 | batch x: [4. 9. 2. 6. 3.] | batch y: [7. 2. 9. 5. 8.]
Epoch: 1 | Step: 0 | batch x: [3. 2. 7. 5. 1.] | batch y: [ 8. 9. 4. 6. 10.]
Epoch: 1 | Step: 1 | batch x: [ 9. 4. 8. 6. 10.] | batch y: [2. 7. 3. 5. 1.]
Epoch: 2 | Step: 0 | batch x: [10. 8. 3. 4. 5.] | batch y: [1. 3. 8. 7. 6.]
Epoch: 2 | Step: 1 | batch x: [1. 9. 6. 2. 7.] | batch y: [10. 2. 5. 9. 4.]
torch.utils.data.TensorDataset(data_tensor, target_tensor)
包装数据和目标张量的数据集.
通过沿着第一个维度索引两个张量来恢复每个样本.
Parameters:
data_tensor (Tensor) – 包含样本数据.
target_tensor (Tensor) – 包含样本目标 (标签).
torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False,
sampler=None, batch_sampler=None,
num_workers=0,
collate_fn=<function default_collate at 0x4316c08>,
pin_memory=False, drop_last=False)
数据加载器. 组合数据集和采样器,并在数据集上提供单进程或多进程迭代器.
Parameters:
dataset
(Dataset – 从该数据集中加载数据.
batch_size
(int, optional)– 每个 batch 加载多少个样本 (默认值: 1).
shuffle
(bool, optional)– 设置为 True 时, 会在每个 epoch 重新打乱数据 (默认值: False).
sampler (Sampler, optional) – 定义从数据集中提取样本的策略. 如果指定, shuffle 值必须为 False.
batch_sampler (Sampler, optional)– 与 sampler 相似, 但一次返回一批指标. 与 batch_size, shuffle, sampler, and drop_last 互斥.
num_workers
(int, optional) – 用多少个子进程加载数据. 0表示数据将在主进程中加载 (默认值: 0)
collate_fn (callable, optional) – 合并样本列表以形成一个 mini-batch.
pin_memory (bool, optional) – 如果为 True, 数据加载器会将张量复制到 CUDA 固定内存中, 然后再返回它们.
drop_last (bool, optional) – 设定为 True 以丢掉最后一个不完整的 batch, 如果数据集大小不能被 batch size整除. 设定为 False 并且数据集的大小不能被 batch size整除, 则最后一个 batch 将会更小. (default: False) eg:若batch_size=8,则step 1 中batch x 的size=2.
4、Optimizer优化器
import torch
import torch.utils.data as Data
import torch.nn.functional as F
import matplotlib.pyplot as plt
LR = 0.01
BATCH_SIZE = 32
EPOCH = 12
# fake dataset
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))
# plot dataset
#plt.scatter(x.numpy(), y.numpy())
#plt.show()
# data loader
torch_dataset = Data.TensorDataset(x, y)
loader = Data.DataLoader(dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,)
# 默认的 network 形式
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(1, 20) # hidden layer
self.predict = torch.nn.Linear(20, 1) # output layer
def forward(self, x):
x = F.relu(self.hidden(x)) # activation function for hidden layer
x = self.predict(x) # linear output
return x
if __name__ == '__main__':
# 为每个优化器创建一个 net
# different nets
net_SGD = Net()
net_Momentum = Net()
net_RMSprop = Net()
net_Adam = Net()
nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam] #将4个网络放到一个list中
# different optimizers
opt_SGD = torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
opt_RMSprop = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
opt_Adam = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam] #将4个优化器放到一个list中
loss_func = torch.nn.MSELoss()
losses_his = [[], [], [], []] # record loss
# training
for epoch in range(EPOCH):
print('Epoch: ', epoch)
for step, (b_x, b_y) in enumerate(loader): # for each training step
for net, opt, l_his in zip(nets, optimizers, losses_his):
output = net(b_x) # get output for every net
loss = loss_func(output, b_y) # compute loss for every net
opt.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
opt.step() # apply gradients
l_his.append(loss.data.numpy()) # loss recoder
labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']
for i, l_his in enumerate(losses_his):
plt.plot(l_his, label=labels[i])
plt.legend(loc='best')
plt.xlabel('Steps')
plt.ylabel('Loss')
plt.ylim((0, 0.2))
plt.show()
torch.optim.SGD(params, lr=<object object>, momentum=0, dampening=0, weight_decay=0, nesterov=False)
实现随机梯度下降算法( momentum 可选)
Args:
params
(iterable): 待优化的迭代参数或者是定义了参数组的 dict eg: model.parameters()
lr
(float): 学习率
dampening
(float, optional): 动量的抑制因子 (默认值: 0)
weight_decay
(float, optional): 权重衰减 (L2 正则化) (默认值: 0)
nesterov
(bool, optional): 使用 Nesterov 动量 (默认值: False)
torch.optim.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
实现 RMSprop 算法.
Args:
alpha
(float, optional): 平滑常量 (default: 0.99)
eps
(float, optional): 为了增加数值计算的稳定性而加到分母里的项 (默认值: 1e-8)
centered
(bool, optional) : 如果为 True, 计算 RMSProp 的中值, 并且用它的方差预测值对梯度进行归一化
torch.optim.Adamax(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
实现 Adamax 算法 ( Adam 的一种基于无穷范数的变种).
Args:
betas
(Tuple[float, float], optional): 用来计算梯度和平方梯度的系数
2018.09.05 整理于莫烦Python教程
https://morvanzhou.github.io/tutorials/machine-learning/torch/3-03-fast-nn/